diff --git a/.github/workflows/conda-package-build.yml b/.github/workflows/conda-package-build.yml index adfc7ca..ed7d257 100644 --- a/.github/workflows/conda-package-build.yml +++ b/.github/workflows/conda-package-build.yml @@ -19,7 +19,5 @@ on: jobs: build: uses: openalea/action-build-publish-anaconda/.github/workflows/openalea_ci.yml@main - with: - conda-directory: "./stat_tool/conda" secrets: anaconda_token: ${{ secrets.ANACONDA_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a257a46 --- /dev/null +++ b/.gitignore @@ -0,0 +1,93 @@ +*.py[cod] +.DS_Store +# C extensions +*.so + +# Packages +*.egg +*.egg-info +build +eggs +.eggs +parts +var +sdist +debian/ +develop-eggs +.installed.cfg +lib +lib64 +MANIFEST +**/__pycache__ +**/vpsequence_analysis +CMakeFiles + +# Installer logs +pip-log.txt +npm-debug.log +pip-selfcheck.json + +# Unit test / coverage reports +.coverage +.tox +nosetests.xml +htmlcov +.cache +.pytest_cache +.mypy_cache + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# SQLite +test_exp_framework + +# npm +node_modules/ + +# dolphin +.directory +libpeerconnection.log + +# setuptools +dist + +# IDE Files +atlassian-ide-plugin.xml +.idea/ +*.swp +*.kate-swp +.ropeproject/ + +# Python3 Venv Files +.venv/ +bin/ +include/ +lib/ +lib64 +pyvenv.cfg +share/ +venv/ +.python-version + +# Cython +*.c + +# Emacs backup +*~ + +# VSCode +/.vscode + +# Automatically generated files +docs/preconvert +site/ +out + +# Sphinx +_static diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..4ec9bd4 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,193 @@ +# Initialize the CMake project +cmake_minimum_required(VERSION 3.20) + +################################################## +# If we are running in a Conda environment, we automatically +# add the Conda env prefix to the CMAKE_PREFIX_PATH + +if(DEFINED ENV{CONDA_PREFIX}) + list(APPEND CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX}") + #TODO: Windows Conda environments are structured differently, + # how unfortunate is this? + list(APPEND CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX}/Library") +endif() + +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +include("Anaconda") + +################################################## +# Define the version + +if(SKBUILD) + set(SEQANA_TOOL_VERSION ${SKBUILD_PROJECT_VERSION}) +else() + set(SEQANA_VERSION "2.0.0") +endif() + +project(openalea.sequence_analysis + VERSION ${SEQANA_VERSION} + LANGUAGES CXX + DESCRIPTION "Statistical analysis of plant architecture sequences") + +################################################## +# Set CMake policies for this project + +# We allow _ROOT (env) variables for locating dependencies using find_paclage +cmake_policy(SET CMP0074 NEW) +# We allow target_sources to convert relative paths to absolute paths +cmake_policy(SET CMP0076 NEW) + # for Python*_FIND_STRATEGY=LOCATION +cmake_policy(SET CMP0094 NEW) +cmake_policy(SET CMP0167 NEW) +################################################## +# Initialize some default paths +# See https://cmake.org/cmake/help/latest/module/GNUInstallDirs.html +include(GNUInstallDirs) + +################################################## +# Set C++ standard and compile options + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +# set(CMAKE_CXX_EXTENSIONS ON) + +# This may be set in pyproject.toml +set(CMAKE_VERBOSE_MAKEFILE ON) + +################################################## +# For Python bindings, we need to enable position-independent code +set(CMAKE_POSITION_INDEPENDENT_CODE ON) # For shared libs + +################################################## +# TODO : REMOVE? +# if(WIN32) +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -enable-stdcall-fixup -enable-auto-import -enable-runtime-pseudo-reloc -s") +# endif() + + +# RPath settings +set(CMAKE_SKIP_BUILD_RPATH FALSE) +set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) +set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +list( + FIND + CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES + ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR} + isSystemDir +) +if("${isSystemDir}" STREQUAL "-1") + set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) +endif("${isSystemDir}" STREQUAL "-1") + + +if (WIN32) + string(REGEX REPLACE "/W3" "/W0" ${CMAKE_CXX_FLAGS} "${${CMAKE_CXX_FLAGS}}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MD") + + # To fix compilation error with vc14 and boost + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DHAVE_SNPRINTF") +endif() + +# Options +option(WITH_EFENCE "Build with efence library" OFF) +option(WITH_TEST "Build tests" OFF) + +################################################## +# Add a library for stat_tool and its Python wrappers +add_library(oasequence_analysis SHARED) + +################################################## +# Find external dependencies +find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) +find_package(Boost REQUIRED COMPONENTS python) +# TODO + +find_library(STAT_TOOL_LIB NAMES oastat_tool) + +#find_package(openalea.stat_tool REQUIRED) + +################################################## +# Setup lib, inc, defines for C++ oastat_tool lib + +target_link_libraries(oasequence_analysis PUBLIC ${STAT_TOOL_LIB}) + +target_include_directories(oasequence_analysis + PUBLIC + ${Boost_INCLUDE_DIRS} + ${CONDA_ENV}include +) + +# Export symbols on Windows +# if (WIN32 OR MSVC) +# target_compile_definitions(oasequence_analysis PUBLIC STAT_TOOL_MAKEDLL) +# endif() + +# Optionally add efence +if(WITH_EFENCE) + target_compile_definitions(oasequence_analysis PUBLIC DEBUG) + target_link_libraries(oasequence_analysis PUBLIC efence) +endif() + +# Add files to build +add_subdirectory(src/cpp/sequence_analysis) + + +################################################## +# Add a Python binding library +python_add_library(_sequence_analysis MODULE) + +target_include_directories(oasequence_analysis + PUBLIC + "src/cpp" +) + +target_link_libraries(_sequence_analysis + PRIVATE + ${OASTAT_LIB} + oasequence_analysis + Boost::python + Python::Module + ) + + target_compile_definitions(_sequence_analysis PRIVATE BOOST_ALL_NO_LIB) + + # Control the output name of the produced shared library + set_target_properties(_sequence_analysis PROPERTIES PREFIX "") + set_target_properties(_sequence_analysis PROPERTIES OUTPUT_NAME "_sequence_analysis") + if(WIN32) + set_target_properties(_sequence_analysis PROPERTIES SUFFIX ".pyd") + elseif(APPLE) + set_target_properties(_sequence_analysis PROPERTIES SUFFIX ".so") + endif() + +add_subdirectory("src/wrapper/") + + +if(APPLE) + set_target_properties(_sequence_analysis PROPERTIES INSTALL_RPATH "@loader_path/.") +elseif(UNIX) + set_target_properties(_sequence_analysis PROPERTIES INSTALL_RPATH "$ORIGIN/.") +endif() + +################################################## +# Install targets and headers for stat_tool lib +install(TARGETS oasequence_analysis + RUNTIME DESTINATION "${CONDA_ENV}bin/" + LIBRARY DESTINATION "${CONDA_ENV}lib/" + ARCHIVE DESTINATION "${CONDA_ENV}lib/" +) + +install( + TARGETS + _sequence_analysis + DESTINATION "${SKBUILD_PLATLIB_DIR}/openalea/sequence_analysis" +) + +# Optionally handle tests +# TODO TO TEST +if(WITH_TEST) + enable_testing() + add_subdirectory(test/cpp) +endif() + diff --git a/README.md b/README.md new file mode 100644 index 0000000..84c9701 --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# Sequence Analysis + +_________________ + +[![Docs](https://readthedocs.org/projects/sequence_analysis/badge/?version=latest)](https://sequence_analysis.readthedocs.io/) +[![Build Status](https://github.com/openalea/sequence_analysis/actions/workflows/conda-package-build.yml/badge.svg?branch=main)](https://github.com/openalea/sequence_analysis/actions/workflows/conda-package-build.yml?query=branch%3Amaster) +[![License](https://img.shields.io/badge/License--CeCILL-C-blue)](https://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html) +[![Anaconda-Server Badge](https://anaconda.org/openalea3/sequence_analysis/badges/version.svg)](https://anaconda.org/openalea3/sequence_analysis) + +_________________ + +[Read Latest Documentation](https://sequence_analysis.readthedocs.io/) - [Browse GitHub Code Repository](https://github.com/openalea/sequence_analysis/) + +_________________ + +**sequence analysis** Basic Statistical tools used by different Structure Analysis libraries. + +### Contributors + +Thanks to all that ontribute making this package what it is ! + + + + + diff --git a/README.txt b/README.txt index 0492287..f740a27 100644 --- a/README.txt +++ b/README.txt @@ -1,4 +1,4 @@ -vplants.stat_tool +openalea.sequence_analysis ----------------- Description @@ -31,5 +31,4 @@ qt >= 4.2 (on windows) Dependencies --------------------- -vplants.tool -vplants.stat_tool \ No newline at end of file +openalea.stat_tool diff --git a/SConstruct b/SConstruct deleted file mode 100644 index a3b8f4c..0000000 --- a/SConstruct +++ /dev/null @@ -1,55 +0,0 @@ -# -*-python-*- - -from openalea.sconsx import config, environ -import os, platform - - -pj = os.path.join -ALEASolution = config.ALEASolution - -options = Variables(['../options.py', 'options.py'], ARGUMENTS) -# Firstly get options in ../options.py and then in options.py and finally in ARGUMENTS -options.Add(BoolVariable('with_efence', 'build with efence library', False)) -options.Add(BoolVariable('with_test', 'build with efence library', False)) - - -tools = ['boost_python', 'openalea.stattool'] - -env = ALEASolution(options, tools) -env.AppendUnique(CXXFLAGS=['-x', 'c++', '-std=c++14']) - -if env['with_efence']: - env.AppendUnique(LIBS=['efence']) - -if (platform.system() != 'Windows' and - os.environ.get('CC') and - os.environ.get('CXX')): - - env.AppendUnique(CFLAGS=["-std=c14"]) - if (platform.system() == 'Darwin'): - env.AppendUnique(CXXFLAGS=['-stdlib=libc++']) - conda_prefix = os.environ['CONDA_PREFIX'] - conda_bin = pj(os.environ['CONDA_PREFIX'], 'bin') - - # To work with conda toolchain - env['AR'] = os.environ['AR'] - env['AS'] = os.environ['AS'] - env['CC'] = pj(conda_bin, os.environ['CC']) - env['CXX'] = pj(conda_bin, os.environ['CXX']) - env.PrependUnique( - CPPPATH=['%s/include'%(conda_prefix)], - CCFLAGS=['-fvisibility=hidden'], - LIBPATH=['%s/lib'%(conda_prefix)]) - -# Build stage -prefix = env['build_prefix'] -seqlib = SConscript(pj(prefix,"src/cpp/SConscript"), - exports='env') -SConscript(pj(prefix,"src/wrapper/SConscript"), - exports='env') - -if bool(env['with_test']): - SConscript(pj(prefix,"test/cpp/SConscript"), exports="env seqlib") - -Default("build") - diff --git a/cmake/Anaconda.cmake b/cmake/Anaconda.cmake new file mode 100644 index 0000000..1835fa4 --- /dev/null +++ b/cmake/Anaconda.cmake @@ -0,0 +1,133 @@ +# Anaconda Check +if (DEFINED ENV{CONDA_PREFIX}) + # Anaconda Environment + message(STATUS "Anaconda environment detected: " $ENV{CONDA_PREFIX}) + + set(CMAKE_INCLUDE_PATH "$ENV{CONDA_PREFIX}/include" ${CMAKE_INCLUDE_PATH}) + set(CMAKE_LIBRARY_PATH "$ENV{CONDA_PREFIX}/lib" ${CMAKE_LIBRARY_PATH}) + + if (DEFINED ENV{PREFIX}) + file(TO_CMAKE_PATH $ENV{PREFIX} TMP_CONDA_ENV) + else() + file(TO_CMAKE_PATH $ENV{CONDA_PREFIX} TMP_CONDA_ENV) + endif() + + if (WIN32) + set(CONDA_ENV "${TMP_CONDA_ENV}/Library/") + else() + set(CONDA_ENV "${TMP_CONDA_ENV}/") + endif() + + set(CONDA_PYTHON_ENV "${TMP_CONDA_ENV}/") + + set(USE_CONDA ON) + +else() + message(STATUS "Compilation outside an anaconda environment.") + set(USE_CONDA OFF) +endif() + + +if (DEFINED ENV{CONDA_BUILD}) + message(STATUS "Conda build detected. " $ENV{CONDA_BUILD}) + + if (WIN32) + set(Python_ROOT_DIR "${PREFIX}") + endif() + + # specify the cross compiler + #set(CMAKE_C_COMPILER $ENV{CC}) + #set(CMAKE_LINKER $ENV{LD}) + #set(CMAKE_AR $ENV{AR}) + #set(CMAKE_NM $ENV{NM}) + #set(CMAKE_RANLIB $ENV{RANLIB}) + #set(CMAKE_STRIP $ENV{STRIP}) + #set(CMAKE_INSTALL_NAME_TOOL $ENV{INSTALL_NAME_TOOL}) + + #if (APPLE) + # set(CMAKE_OSX_ARCHITECTURES $ENV{OSX_ARCH}) + #endif() + + #set(CMAKE_CXX_COMPILER $ENV{CXX}) + #set(CMAKE_CXX_COMPILER_RANLIB $ENV{RANLIB}) + #set(CMAKE_CXX_COMPILER_AR $ENV{AR}) + + # where is the target environment + set(CMAKE_FIND_ROOT_PATH $ENV{PREFIX} $ENV{BUILD_PREFIX}) + if (APPLE) + list(APPEND CMAKE_FIND_ROOT_PATH $ENV{CONDA_BUILD_SYSROOT} ) + endif() + if (WIN32) + list(APPEND CMAKE_FIND_ROOT_PATH $ENV{BUILD_PREFIX}/Library/usr $ENV{PREFIX}/Library/usr) + set(CMAKE_INCLUDE_PATH "$ENV{BUILD_PREFIX}/Library/usr/include" ${CMAKE_INCLUDE_PATH}) + set(CMAKE_LIBRARY_PATH "$ENV{BUILD_PREFIX}/Library/usr/lib" ${CMAKE_LIBRARY_PATH}) + endif() + if (UNIX) + # I add both old stype and new style cdts : https://github.com/conda-forge/cdt-builds#old-stylelegacy-vs-new-style-cdts + list(APPEND CMAKE_FIND_ROOT_PATH $ENV{BUILD_PREFIX}/x86_64-conda-linux-gnu/sysroot $ENV{BUILD_PREFIX}/$ENV{HOST}/sysroot ) + list(APPEND CMAKE_FIND_ROOT_PATH $ENV{PREFIX}/x86_64-conda-linux-gnu/sysroot $ENV{PREFIX}/$ENV{HOST}/sysroot ) + + link_directories($ENV{BUILD_PREFIX}/x86_64-conda-linux-gnu/sysroot/lib64 $ENV{BUILD_PREFIX}/$ENV{HOST}/sysroot/lib64) + link_directories($ENV{BUILD_PREFIX}/x86_64-conda-linux-gnu/sysroot/lib $ENV{BUILD_PREFIX}/$ENV{HOST}/sysroot/lib) + link_directories($ENV{BUILD_PREFIX}/x86_64-conda-linux-gnu/sysroot/usr/lib64 $ENV{BUILD_PREFIX}/$ENV{HOST}/sysroot/usr/lib64) + link_directories($ENV{BUILD_PREFIX}/x86_64-conda-linux-gnu/sysroot/usr/lib $ENV{BUILD_PREFIX}/$ENV{HOST}/sysroot/usr/lib) + endif() + + message(STATUS "CMAKE_FIND_ROOT_PATH :") + foreach(dir ${CMAKE_FIND_ROOT_PATH}) + message(STATUS " - " ${dir}) + endforeach() + + # search for programs in the build host directories + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) + # for libraries and headers in the target directories + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + + set(USE_CONDA_BUILD ON) +else() + set(USE_CONDA_BUILD OFF) +endif() + +function(oa_default_install) + if(USE_CONDA_BUILD) + set(CMAKE_INSTALL_PREFIX $ENV{PREFIX} CACHE PATH "..." FORCE) + elseif() + set(CMAKE_INSTALL_PREFIX ${CONDA_ENV} CACHE PATH "..." FORCE) + else() + set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR}/build CACHE PATH "..." FORCE) + endif() + message(STATUS "Default install prefix to " ${CMAKE_INSTALL_PREFIX}) +endfunction() + +if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + oa_default_install() +elseif (NOT DEFINED CMAKE_INSTALL_PREFIX) + oa_default_install() +else() + message(STATUS "Install Prefix: " ${CMAKE_INSTALL_PREFIX}) +endif() + +function(install_share sharedirectory project) + install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${sharedirectory}/ DESTINATION "${CONDA_ENV}/share/${project}") +endfunction() + + +function(install_oalib libname) + message("Installing ${libname} in ${CONDA_ENV}lib/") + install(TARGETS ${libname} + RUNTIME DESTINATION "${CONDA_ENV}bin/" + LIBRARY DESTINATION "${CONDA_ENV}lib/" + ARCHIVE DESTINATION "${CONDA_ENV}lib/" + ) +endfunction() + +function(install_oabin libname) + message("Installing ${libname} in ${CONDA_ENV}bin/") + install(TARGETS ${libname} RUNTIME DESTINATION "${CONDA_ENV}bin/") +endfunction() + +function(install_oaheaders directory exclude) + message("Installing header from ${directory} in ${CONDA_ENV}include/") + install(DIRECTORY ${directory} DESTINATION "${CONDA_ENV}include/" FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp" PATTERN ${exclude} EXCLUDE) +endfunction() diff --git a/conda/environment.yml b/conda/environment.yml new file mode 100644 index 0000000..6b13b67 --- /dev/null +++ b/conda/environment.yml @@ -0,0 +1,13 @@ +name: sequence_analysis_dev +channels: + - conda-forge + - openalea3 +dependencies: + - python + - pip + - boost + - matplotlib-base + - openalea.stat_tool + - pip: + - -e '..[test,dev,doc]' + diff --git a/conda/meta.yaml b/conda/meta.yaml index bf469f3..38ba187 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,35 +1,76 @@ + +{% set pyproject = load_file_data('../pyproject.toml', from_recipe_dir=True) %} +{% set name = pyproject.get('project').get('name') %} +{% set description = pyproject.get('project').get('description') %} +{% set version = environ.get('SETUPTOOLS_SCM_PRETEND_VERSION', "0.0.0.dev") %} +{% set license = pyproject.get('project').get('license') %} +{% set home = pyproject.get('project', {}).get('urls', {}).get('Homepage', '') %} +{% set build_deps = pyproject.get("build-system", {}).get("requires", []) %} +{% set deps = pyproject.get('project', {}).get('dependencies', []) %} +{% set conda_deps = pyproject.get('tool', {}).get('conda', {}).get('environment', {}).get('dependencies',[]) %} +{% set test_deps = pyproject.get('project', {}).get('optional-dependencies', {}).get('test',[]) %} + + + package: - name: openalea.sequence_analysis - version: 2.0.0 + name: {{ name }} + version: {{ version }} + source: path: .. -about: - home: http://openalea.gforge.inria.fr - license: GPL/LGPL - summary: Models and algorithms for sequence analysis - -# needs boost - this is now in defaults. the ioos boost is broken (and has been -# deprecated but still appears in the channel - 2015-08-02) build: - preserve_egg_dir: True number: 0 - script: python setup.py install --prefix=$PREFIX + preserve_egg_dir: True + script: + - {{ PYTHON }} -m pip install . --no-deps --ignore-installed --no-build-isolation -vv requirements: - # it doesn't really require python, but maybe that will fix - # the missing bin directory + host: + - python + {% for dep in build_deps %} + - {{ dep }} + {% endfor %} + - cmake + - make # [not win] + - openalea.stat_tool + - boost + build: - - python - - setuptools - - openalea.deploy - - scons - - openalea.sconsx - - boost - - openalea.stat_tool + - {{ compiler("cxx") }} + run: - - python - - boost - - openalea.stat_tool + - python + {% for dep in deps + conda_deps %} + - {{ dep }} + {% endfor %} + + + + +test: + imports: + - openalea.stat_tool + requires: + + {% for dep in test_deps %} + - {{ dep }} + {% endfor %} + + source_files: + - test/ + commands: + - pytest + + +about: + home: {{ home }} + summary: {{ description }} + license: {{ license }} + + +extra: + recipe-maintainers: + - Jean-Baptiste Durand diff --git a/doc/Makefile b/doc/Makefile index 854550e..8b08d3a 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,98 +2,247 @@ # # You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXATUOBUILD = sphinx-autobuild +PAPER = +BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest - +.PHONY: help help: @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - + @echo " html to make standalone HTML files" + @echo " rtdhtml Build html using same settings used on ReadtheDocs" + @echo " livehtml Make standalone HTML files and rebuild the documentation when a change is detected. Also includes a livereload enabled web server" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " epub3 to make an epub3" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + @echo " dummy to check syntax errors of document sources" + +.PHONY: clean clean: - -rm -rf $(BUILDDIR)/* + rm -rf $(BUILDDIR)/* + rm -rf generated/* + rm -rf auto_gallery/ +.PHONY: html html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." +.PHONY: rtdhtml +rtdhtml: + $(SPHINXBUILD) -T -j auto -E -W --keep-going -b html -d $(BUILDDIR)/doctrees -D language=en . $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + + +.PHONY: livehtml +livehtml: + # @echo "$(SPHINXATUOBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html" + $(SPHINXATUOBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + +.PHONY: dirhtml dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." +.PHONY: singlehtml +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +.PHONY: html-noplot +html-noplot: + $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +.PHONY: pickle pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." +.PHONY: json json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." +.PHONY: htmlhelp htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." +.PHONY: qthelp qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/MAppleT.qhcp" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/xarray.qhcp" @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/MAppleT.qhc" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/xarray.qhc" + +.PHONY: applehelp +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +.PHONY: devhelp +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/xarray" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/xarray" + @echo "# devhelp" + +.PHONY: epub +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." +.PHONY: epub3 +epub3: + $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 + @echo + @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." + +.PHONY: latex latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ - "run these through (pdf)latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +.PHONY: latexpdf +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: latexpdfja +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: text +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +.PHONY: man +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." -pdf: latex - cd $(BUILDDIR)/latex; - make all-ps - cd ../.. +.PHONY: texinfo +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +.PHONY: info +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +.PHONY: gettext +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." +.PHONY: changes changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." +.PHONY: linkcheck linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." +.PHONY: doctest doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." +.PHONY: coverage coverage: $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage" + "results in $(BUILDDIR)/coverage/python.txt." + +.PHONY: xml +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +.PHONY: pseudoxml +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." + +.PHONY: dummy +dummy: + $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy + @echo + @echo "Build finished. Dummy builder generates no files." diff --git a/doc/api.md b/doc/api.md new file mode 100644 index 0000000..e1a4b56 --- /dev/null +++ b/doc/api.md @@ -0,0 +1,8 @@ +# API reference + +```{toctree} +:maxdepth: 2 +:hidden: + + +``` diff --git a/doc/conf.py b/doc/conf.py index 8767aae..8d9930a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,290 +1,166 @@ # -*- coding: utf-8 -*- -# -# sequence_analysis documentation build configuration file, created by -# sphinx-quickstart on Fri Oct 6 12:50:27 2017. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys import os -from os.path import join as pj +import sys +from importlib.metadata import metadata + +pkg_name = "sequence_analysis" +meta = metadata("openalea." + pkg_name) +release = meta.get("version") +# for example take major/minor +version = ".".join(release.split(".")[:3]) +author = meta["Author"].split(",")[0] + "et al." +desc = meta["Summary"] # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath("..")) # to include the root of the package # -- General configuration ------------------------------------------------ -f = pj(os.path.dirname(__file__),'..','src', 'openalea', 'sequence_analysis','__version__.py') -d = {} -exec(compile(open(f, "rb").read(), f, 'exec'),d,d) -version= d['SEQUENCE_ANALYSIS_VERSION_STR'] - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [] +extensions = [ + "sphinx.ext.autodoc", # support for automatic inclusion of docstring + "sphinx.ext.autosummary", # generates autodoc summaries + "sphinx.ext.doctest", # inclusion and testing of doctest code snippets + "sphinx.ext.intersphinx", # support for linking to other projects + "sphinx.ext.imgmath", # support for math equations + "sphinx.ext.ifconfig", # support for conditional content + "sphinx.ext.viewcode", # support for links to source code + "sphinx.ext.coverage", # includes doc coverage stats in the documentation + "sphinx.ext.todo", # support for todo items + "sphinx.ext.napoleon", # support for numpy and google style docstrings + "sphinx_favicon", # support for favicon + "nbsphinx", # for integrating jupyter notebooks + "myst_parser", # for parsing .md files +] -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +nbsphinx_thumbnails = { + "examples/Segmentation": "_static/segmentation_thumb.png", +} + +nbsphinx_allow_errors = True +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] +autosummary_generate = True +exclude_patterns = ["_build", "_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} # The master toctree document. -master_doc = 'index' - +master_doc = "index" # General information about the project. -project = 'L-Py' -copyright = 'J.-B. Durand for Cirad-Inria-Inra under Cecill-C license' -author = 'J.-B. Durand, Y. Guedon et al.' - +project = pkg_name +copyright = "Cecill-C INRAE / INRIA / CIRAD" +author = author # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # -# The short X.Y version. -#version = u'1' -# The full version, including alpha/beta/rc tags. -release = '0' - # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = ['_build'] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - +language = "en" # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False - # -- Options for HTML output ---------------------------------------------- - # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -#html_theme = 'classic' -#html_theme = "sphinx_rtd_theme" -#html_theme_path = ["_themes", ] - +html_theme = "pydata_sphinx_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - +html_theme_options = { + "header_links_before_dropdown": 6, + "sidebarwidth": 200, + "collapse_navigation": "false", + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/openalea/sequence_analysis", + "icon": "fa-brands fa-github", + }, + ], + "show_version_warning_banner": True, + "footer_start": ["copyright"], + "footer_center": ["sphinx-version"], + "secondary_sidebar_items": { + "**/*": ["page-toc", "edit-this-page", "sourcelink"], + "examples/no-sidebar": [], + }, +} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -#html_extra_path = [] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - +html_static_path = ["_static"] +html_logo = "_static/openalea_web.svg" +html_favicon = "_static/openalea_web.svg" # If false, no module index is generated. -#html_domain_indices = True - +html_domain_indices = True # If false, no index is generated. -#html_use_index = True - +html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False - +html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - +html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - +html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' -#html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# Now only 'ja' uses this config value -#html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' - +html_show_copyright = True # Output file base name for HTML help builder. -htmlhelp_basename = 'sequence_analysis' +htmlhelp_basename = "sequence_analysis_documentation" # -- Options for LaTeX output --------------------------------------------- - -latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). +latex_elements = {} latex_documents = [ - (master_doc, 'sequence_analysis.tex', 'sequence_analysis Documentation', - author, 'manual'), + ( + master_doc, + "sequence_analysis.tex", + "sequence_analysis Documentation", + "INRA / INRIA / CIRAD", + "manual", + ), ] -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - # -- Options for manual page output --------------------------------------- - # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'sequence_analysis', 'sequence_analysis Documentation', - [author], 1) + ( + master_doc, + "openalea.sequence_analysis", + "sequence_analysis Documentation", + [author], + 1, + ) ] -# If true, show URL addresses after external links. -#man_show_urls = False - - # -- Options for Texinfo output ------------------------------------------- - # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'L-Py', 'L-Py Documentation', - author, 'L-Py', ' L-Py is a simulation software that mixes L-systems construction with the Python high-level modeling language.', - 'Miscellaneous'), + ( + master_doc, + "sequence_analysis", + "sequence_analysis Documentation", + author, + "sequence_analysis", + "Anaylsis of sequences in tree architecture.", + "Miscellaneous", + ), ] +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {"python": ("https://docs.python.org/", None)} -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False diff --git a/doc/extra.md b/doc/extra.md new file mode 100644 index 0000000..ea0a4ba --- /dev/null +++ b/doc/extra.md @@ -0,0 +1,23 @@ +# Additional resources for stat_tool + +```{include} ../CONTRIBUTING.md + :start-after: + :end-before +``` + +```{include} ../AUTHORS.md + :start-after: + :end-before +``` + +## License + +```{include} ../LICENSE + :start-after: + :end-before +``` + +```{include} ../CHANGELOG.md + :start-after: + :end-before +``` diff --git a/doc/fig7_1.png b/doc/fig7_1.png new file mode 100644 index 0000000..d14f933 Binary files /dev/null and b/doc/fig7_1.png differ diff --git a/doc/fig7_2.png b/doc/fig7_2.png new file mode 100644 index 0000000..440aa94 Binary files /dev/null and b/doc/fig7_2.png differ diff --git a/doc/index.md b/doc/index.md new file mode 100644 index 0000000..803e0d3 --- /dev/null +++ b/doc/index.md @@ -0,0 +1,18 @@ +# stat_tool + +## Official documentation for openalea.stat_tool + +```{toctree} +:maxdepth: 2 +:hidden: + +Home +Install +Getting started +Usage +API Reference +More +``` + +```{include} ../README.md +``` diff --git a/doc/installation.md b/doc/installation.md new file mode 100644 index 0000000..5e718ea --- /dev/null +++ b/doc/installation.md @@ -0,0 +1,45 @@ +# Installation + +You must use conda environment : + +## Users + +### Create a new environment with stat_tool installed in there + +```bash + +mamba create -n stat_tool -c openalea3 -c conda-forge openalea.stat_tool +mamba activate stat_tool +``` + +Install stat_tool in a existing environment + +```bash +mamba install -c openalea3 -c conda-forge openalea.stat_tool +``` + +### (Optional) Test your installation + +```bash +mamba install -c conda-forge pytest +git clone https://github.com/openalea/stat_tool.git +cd stat_tool/test; pytest +``` + +## Developers + +### Install From source + +```bash +# Install dependency with conda +mamba env create -n phm -f conda/environment.yml +mamba activate stat_tool + +# Clone stat_tool and install +git clone https://github.com/openalea/stat_tool.git +cd stat_tool +pip install . + +# (Optional) Test your installation +cd test; pytest +``` diff --git a/doc/usage.md b/doc/usage.md new file mode 100644 index 0000000..2f934ea --- /dev/null +++ b/doc/usage.md @@ -0,0 +1,6 @@ +# Usage + +```{nbgallery} +examples/example1.ipynb +examples/example2.ipynb +``` diff --git a/doc/user/visualea_beech1.rst b/doc/user/visualea_beech1.rst new file mode 100644 index 0000000..55c44fb --- /dev/null +++ b/doc/user/visualea_beech1.rst @@ -0,0 +1,19 @@ +Beech demo +=========== +.. sectionauthor:: Thomas Cokelaer + +.. dataflow:: Demo.ChangePoint_stat_tool beech1 + :width: 50% + + test + + + +.. plot:: + :width: 40% + + from openalea.core.alea import * + pm = PackageManager() + run_and_display(('Demo.ChangePoint_stat_tool', 'beech1'),{},pm=pm) + + diff --git a/doc/user/visualea_oak_demo.rst b/doc/user/visualea_oak_demo.rst new file mode 100644 index 0000000..404abdb --- /dev/null +++ b/doc/user/visualea_oak_demo.rst @@ -0,0 +1,20 @@ +OAK demo +========= + +.. sectionauthor:: Thomas Cokelaer + + +.. dataflow:: Demo.ChangePoint_stat_tool oak_demo + :width: 50% + + test + + +.. plot:: + :width: 40% + + from openalea.core.alea import * + pm = PackageManager() + run_and_display(('Demo.ChangePoint_stat_tool', 'oak_demo'),{},pm=pm) + + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4b9c701 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,111 @@ +[build-system] +requires = [ + "scikit-build-core", + "setuptools_scm", +] +build-backend = "scikit_build_core.build" + +[tool.setuptools] +include-package-data = false # force explicit declaration of data (disable automatic inclusion) + +[tool.setuptools.package-data] +"openalea.sequence_analysis" = ["data/**/*"] + +# enable dynamic version based on git tags +[tool.setuptools_scm] +fallback_version = "1.4.0" +version_scheme = "guess-next-dev" +local_scheme = "no-local-version" + +[tool.scikit-build] +build-dir = "./build/" +metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" +logging.level = "WARNING" +build.verbose = true +sdist.include = ["*.so", "*.dylib", "*.dll", "*.pyd", "*.lib"] +editable.rebuild = false +experimental = false +search.site-packages = false + +[tool.scikit-build.cmake] +build-type = "Release" +source-dir = "." + +[tool.scikit-build.wheel.packages] +"openalea/sequence_analysis" = "src/openalea/sequence_analysis" +"openalea/seqint" = "src/openalea/seqint" + +[project] +name = "openalea.sequence_analysis" +authors = [ + { name = "Yann Guédon" }, + { name = "Jean-Baptiste Durand" }, + { name = "Thomas Cokelaer" }, + { name = "Christophe Pradal"}, + { name = "Thomas Arsouze" }, +] +description = "Statistical analysis of plant architecture sequences" +readme = "README.md" +license = "GPL-2.0" +license-files = ["LICEN[CS]E*"] +requires-python = ">=3.10" +dynamic = ["version"] +classifiers = [ + "Development Status :: 1 - Planning", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering", +] + +# you can list here all dependencies that are pip-instalable, and that have a name identical to the one used by conda (to allow reuse of this list in meta.yaml) +# If conda name is different, please do not declare the pip name, and declare conda name in the next section +dependencies = [ +] # == install_requires + +[project.optional-dependencies] +test = [ + "pytest", + "nbmake", +] +dev = [ + "pytest >=6", + "pytest-cov >=3", +] +doc = [ + "sphinx-autobuild", + "pydata-sphinx-theme", + "myst-parser", + "sphinx-favicon", + "ipykernel", + "sphinx-copybutton", + "ipython_genutils", + "nbsphinx", +] + +# section specific to conda-only distributed package (not used by pip yet) +[tool.conda.environment] +channels = [ + "openalea3", + "conda-forge" +] +dependencies = [ + "boost", + "matplotlib-base", + "openalea.stat_tool", +] + +[project.urls] +Repository = "https://github.com/openalea/sequence_analysis" +Homepage = "https://sequence_analysis.readthedocs.io/" +"Bug Tracker" = "https://github.com/openalea/sequence_analysis/issues" +Discussions = "https://github.com/openalea/sequence_analysis/discussions" +Changelog = "https://github.com/openalea/sequence_analysis/releases" + diff --git a/result b/result new file mode 100644 index 0000000..c0947eb --- /dev/null +++ b/result @@ -0,0 +1,22 @@ +value 95 96 97 +sample size 46 46 46 +mean 2.08696 1.19565 1.63043 +variance 0.170048 0.205314 0.282609 +standard deviation 0.412369 0.453116 0.53161 +mean absolute deviation 0.202268 0.323251 0.493384 +coefficient of concentration 0.0584239 0.137945 0.15913 +coefficient of skewness 2.64171 2.3056 -0.08945 +coefficient of kurtosis 9.96979 4.16166 -1.08981 + + frequency distribution 95 frequency distribution 96 frequency distribution 97 cumulative distribution 95 function cumulative distribution 96 function cumulative distribution 97 function +0 0 0 0 0 0 0 +1 1 38 18 0.0217391 0.826087 0.391304 +2 41 7 27 0.913043 0.978261 0.978261 +3 3 1 1 0.978261 1 1 +4 1 1 + +Kruskal-Wallis test +chi-square test 2 degrees of freedom +chi-square value 58.1932 critical probability 2.30942e-13 +reference chi-square value 5.99146 reference critical probability 0.05 +reference chi-square value 9.21034 reference critical probability 0.01 diff --git a/sequence_analysis.py b/sequence_analysis.py deleted file mode 100644 index 13d30e0..0000000 --- a/sequence_analysis.py +++ /dev/null @@ -1,287 +0,0 @@ -"""Classes shared by most statistical modules -""" - - -import cstat_tool, os - -DistributionIdentifier=cstat_tool.DistributionIdentifier - -Parametric=cstat_tool._Parametric -Distribution=cstat_tool.Distribution -D_DEFAULT=cstat_tool.D_DEFAULT() -I_DEFAULT=cstat_tool.I_DEFAULT() -SELF_TRANSITION=cstat_tool.SELF_TRANSITION() -RestorationAlgorithm=cstat_tool.RestorationAlgorithm - -class _PlotManager: - """Manage the graphical outputs using Gnuplot.py.""" - def __init__(self, file_list, ref_prefix, nb_windows=1): - """Initialize a PlotManager. - - Argument file_list refers to the entire set of files generated by - the Plot command, ref_prefix to the prefix of the particular file - to be drawn and nb_windows to the number of graphs for that file.""" - import Gnuplot, sys, os - # print file_list - g=Gnuplot.Gnuplot() - self.__plot=g - cfile=open(ref_prefix+'.plot','r') - c_commands=cfile.readlines() - cfile.close() - py_commands= [] - l=[] - # extract the successive commands - for c in c_commands: - if 'pause' in c: - py_commands.append(l) - l= [] - continue - l.append(c) - gcommands=[ "".join(c) for c in py_commands] - cont=True - if nb_windows==1: - # one single frame to be printed - prompt='' - else: - prompt=': continue, ' - prompt+='

: print, : suspend, : quit\n' - frame=0 - sys.stderr.write(prompt) - while cont: - g(gcommands[frame]) - choice=sys.stdin.read(1) - if choice=='q': - cont=False - self.__plot=None - elif ((choice=='\n') and not(nb_windows==1)): - frame+=1 - if frame==len(gcommands): - frame=0 - elif choice=='s': - cont=False - elif choice=='p': - cont=False - self.__plot=None - cfile=open(ref_prefix+'.print','r') - g=Gnuplot.Gnuplot() - c_commands=cfile.read() - cfile.close() - # print c_commands - c_commands=self.__replacestr(c_commands, 'postscript', - 'postscript color') - # similar replacement can be performed for the file name - # seek for keyword "set output" - strseek="SET OUTPUT" - pos=c_commands.upper().find(strseek) - if pos != -1: - endlpos=c_commands.find("\n",pos) - file_name=c_commands[pos+len(strseek):endlpos] - print "graph printed to" + str(file_name) + "\n" - g(c_commands) - del g - for tmpfile in file_list: - os.remove(os.getcwd()+os.sep+tmpfile) - - def __replacestr(self, message, string, subst): - # replace string by subst in message - index=0 - while index < len(message): - i=message.find(string, index) - if (i==-1) or (i+len(string)+1 >= len(message)): - # subchain string has not been found or this is the last word - return message - else: - index=i+1 - message=message[0:i]+str(subst) \ - +message[i+len(string):len(message)] - return message - -class FormatError(Exception): - """Exceptions related to the statistical modules.""" - - def __init__(self, error=None): - """Initialize a FormatError exception.""" - if error is None: - self.__error="" - else: - self.__error=error - - def _error(self): - return str(self.__error) - - def __str__(self): - return str(self.__error) - -class Histogram: - """Histograms.""" - - def __init__(self, histogram): - """Initialize an Histogram by copy.""" - if issubclass(histogram.__class__, Histogram): - # histogram is supposed to be a Histogram... - self.__histo=cstat_tool.Histogram(histogram.__histo) - elif issubclass(histogram.__class__, cstat_tool.Histogram): - # ... or a cstat_tool.Histogram... - self.__histo=cstat_tool.Histogram(histogram) - elif issubclass(histogram.__class__, cstat_tool.Distribution): - # ... or a cstat_tool.Distribution... - self.__histo=cstat_tool.Histogram(histogram) - else: - # ... or a sample of int - try: - chisto=cstat_tool.Histogram(histogram) - except RuntimeError, error: - raise FormatError, error - else: - self.__histo=chisto - - def Display(self, Detail=None, ViewPoint=None): - """Display the Histogram using an ASCII output. - - Usage: Display(ViewPoint="Survival") - Display(Detail=2)""" - if ViewPoint is None: - # Display(Detail=2) - if Detail is None: - Detail=1 - if Detail==1: - exhaustive=False - elif Detail==2: - exhaustive=True - elif type(Detail)!=int: - msg="Bad type for 'Detail' argument:"+str(type(Detail)) \ - +" - expecting type 'int'" - raise TypeError, msg - else: - msg="Bad value for 'Detail' argument:"+str(Detail) \ - +" - expecting 1 or 2" - raise ValueError, msg - try: - # s=cstat_tool.Histogram.display(self.__histo, exhaustive) - s=self.__histo.display(exhaustive) - except RuntimeError, f: - raise FormatError, f - print s - else: - # Display(ViewPoint="Survival") - if Detail is None: - if type(ViewPoint)!=str: - msg="bad type for 'ViewPoint' argument:" + \ - str(type(ViewPoint)) + " - expecting type 'str'" - raise TypeError, msg - elif ViewPoint.upper()!="SURVIVAL": - msg="Bad value for 'ViewPoint' argument:" + str(Detail) \ - +" - expecting 'Survival'" - raise ValueError, msg - try: - s=self.__histo.display_survival() - except RuntimeError, f: - raise FormatError, f - print s - else: - msg="Display must be used with either 'Detail' or with " + \ - "'ViewPoint' parameter, not both" - raise ValueError, msg - - def Plot(self, ViewPoint=None, Title=""): - """Graphical output of the Histogram using Gnuplot.py. - - Usage: Plot(Title="Any Title") - Plot(ViewPoint="Survival", Title="Any Title")""" - if not(ViewPoint is None): - if type(ViewPoint)!=str: - msg="bad type for 'ViewPoint' argument:"+str(type(ViewPoint)) \ - +" - expecting type 'str'" - raise TypeError, msg - elif ViewPoint.upper()!="SURVIVAL": - msg="Bad value for 'ViewPoint' argument:"+str(Detail) \ - +" - expecting 'Survival'" - raise ValueError, msg - import os - prefix="ftmp" - file_created=False - # find a non existing file name - while not file_created: - try: - cfile=open(prefix+'.plot','r') - except IOError: - # file does not exist - # file_list= [prefix+extension for extension in \ - # [".plot", "1.dat", "0.dat", ".print"]] - file_created=True - else: - import random - prefix+=str(random.randint(1,9)) - try: - file_list=[] - if ViewPoint is None: - self.__histo.plot_write(os.getcwd()+os.sep+prefix, Title) - else: - self.__histo.plot_write_survival(os.getcwd()+os.sep+prefix, - Title) - # build the list of the files actually created: - for var in range(3): - filename=prefix+str(var) - try: - tmpfile=open(filename+'.dat', 'r') - except IOError: - pass - else: - tmpfile.close() - # add the .dat file - file_list+=[filename+'.dat'] - file_list+=[prefix+extension - for extension in [".plot", ".print"]] - except RuntimeError, f: - for tmpfile in file_list: - os.remove(os.getcwd()+os.sep+tmpfile) - raise FormatError, f - else: - if ViewPoint==None: - nb_windows=1 - else: - nb_windows=3 - self.__plot=_PlotManager(file_list, prefix, nb_windows) - - def Save(self, file_name, Format="ASCII", Detail=2): - """Save the Histogram into a file, using "ASCII" or - "SPREADSHEET" format and a level of detail 1 or 2. - - Usage: Save("my_filename.txt", Format="ASCII", Detail=2) - Save("my_filename.txt", "Spreadsheet")""" - if not (Format.upper()=="ASCII" - or Format.upper()=="SPREADSHEET"): - msg="unknown file format: "+str(format) - raise ValueError, msg -## elif not (ViewPoint.upper()=="DATA" -## or ViewPoint.upper()=="SURVIVAL"): -## msg="unknown viewpoint: "+str(format) -## raise ValueError, msg - if Detail==1: - exhaustive=False - elif Detail==2: - exhaustive=True - elif type(Detail)==int: - msg="invalid level of detail: "+str(Detail) - raise ValueError, msg - else: - msg="invalid type for detail: "+str(type(Detail)) - raise TypeError, msg - if Format.upper()=="ASCII": - try: - self.__histo.ascii_write(file_name, exhaustive) - except RuntimeError, error: - raise FormatError, error - else: - try: - self.__histo.spreadsheet_write(file_name)#, exhaustive) - except RuntimeError, error: - raise FormatError, error - - def _chisto(self): - return(self.__histo) - - def __str__(self): - return str(self.__histo) - - diff --git a/src/cpp/sequence_analysis/CMakeLists.txt b/src/cpp/sequence_analysis/CMakeLists.txt new file mode 100644 index 0000000..9866392 --- /dev/null +++ b/src/cpp/sequence_analysis/CMakeLists.txt @@ -0,0 +1,20 @@ +# Glob includes and sources +file(GLOB HEADERS "*.h" "*.hpp") +file(GLOB SOURCES "*.cpp") + + +target_include_directories(oasequence_analysis PUBLIC .) + +target_sources( + oasequence_analysis + PRIVATE + ${SOURCES} +) + +# Add the headers to be installed with the library + +# Add the headers to be installed with the library +install( + FILES ${HEADERS} + DESTINATION "${CONDA_ENV}include/sequence_analysis" +) \ No newline at end of file diff --git a/src/cpp/sequence_analysis/alignment.cpp b/src/cpp/sequence_analysis/alignment.cpp new file mode 100644 index 0000000..5a63262 --- /dev/null +++ b/src/cpp/sequence_analysis/alignment.cpp @@ -0,0 +1,3009 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the alignment of 2 sequences. + * + * \param[in,out] os stream, + * \param[in] width column width, + * \param[in] ref_index reference sequence index, + * \param[in] test_index test sequence index, + * \param[in] alignment reference on the alignment, + * \param[in] alignment_index alignment index. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::alignment_ascii_print(ostream &os , int width , int ref_index , int test_index , + const Sequences &alignment , int alignment_index) const + +{ + int i , j , k , m , n; + int ref_rank , test_rank , alignment_rank; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + os << "\n" << SEQ_label[SEQL_SEQUENCE] << " " << identifier[test_index] + << " (" << SEQ_label[SEQL_LENGTH] << " " << length[test_index] << ") " << SEQ_label[SEQL_ALIGNED_ON] + << " " << SEQ_label[SEQL_SEQUENCE] << " " << identifier[ref_index] + << " (" << SEQ_label[SEQL_LENGTH] << " " << length[ref_index] << ")" << endl; + + ref_rank = 0; + test_rank = 0; + alignment_rank = 0; + + os << "\n"; + i = 0; + for (j = 0;j < alignment.length[alignment_index];j++) { + if ((alignment.int_sequence[alignment_index][0][j] != DELETION) && + (alignment.int_sequence[alignment_index][0][j] != BEGIN_END_DELETION)) { + if (index_parameter) { + os << setw(width) << index_parameter[test_index][i++]; + } + + else { + if (type[0] != REAL_VALUE) { + os << setw(width) << int_sequence[test_index][0][i++]; + } + else { + os << setw(width) << real_sequence[test_index][0][i++]; + } + } + } + + else if (alignment.int_sequence[alignment_index][0][j] == DELETION) { + os << setw(width) << "-"; + } + + else { + os << setw(width) << " "; + } + os << " "; + + if (((j - alignment_rank) * (width + 1) > LINE_NB_CHARACTER) || + (j == alignment.length[alignment_index] - 1)) { + if (j < alignment.length[alignment_index] - 1) { + os << "\\"; + } + os << endl; + + // test sequence + + for (k = (index_parameter ? 0 : 1);k < nb_variable;k++) { + m = test_rank; + for (n = alignment_rank;n <= j;n++) { + if ((alignment.int_sequence[alignment_index][0][n] != DELETION) && + (alignment.int_sequence[alignment_index][0][n] != BEGIN_END_DELETION)) { + if (type[k] != REAL_VALUE) { + os << setw(width) << int_sequence[test_index][k][m++]; + } + else { + os << setw(width) << real_sequence[test_index][k][m++]; + } + } + + else if (alignment.int_sequence[alignment_index][0][n] == DELETION) { + os << setw(width) << "-"; + } + + else { + os << setw(width) << " "; + } + os << " "; + } + + if (j < alignment.length[alignment_index] - 1) { + os << "\\"; + } + os << endl; + } + os << endl; + + if (j < alignment.length[alignment_index] - 1) { + test_rank = m; + } + + // edit operations + + for (k = alignment_rank;k <= j;k++) { + switch (alignment.int_sequence[alignment_index][0][k]) { + case DELETION : + os << setw(width) << "d"; + break; + case INSERTION : + os << setw(width) << "i"; + break; + case MATCH : + os << setw(width) << "|"; + break; + case SUBSTITUTION : + os << setw(width) << "s"; + break; + case TRANSPOSITION : + os << setw(width) << "t"; + break; + default : + os << setw(width) << " "; + break; + } + os << " "; + } + + if (j < alignment.length[alignment_index] - 1) { + os << "\\"; + } + os << endl; + + // reference sequence + + os << "\n"; + if (index_parameter) { + k = ref_rank; + for (m = alignment_rank;m <= j;m++) { + if ((alignment.int_sequence[alignment_index][0][m] != INSERTION) && + (alignment.int_sequence[alignment_index][0][m] != BEGIN_END_INSERTION)) { + os << setw(width) << index_parameter[ref_index][k++]; + } + else if (alignment.int_sequence[alignment_index][0][m] == INSERTION) { + os << setw(width) << "-"; + } + else { + os << setw(width) << " "; + } + os << " "; + } + + if (j < alignment.length[alignment_index] - 1) { + os << "\\"; + } + os << endl; + } + + for (k = 0;k < nb_variable;k++) { + m = ref_rank; + for (n = alignment_rank;n <= j;n++) { + if ((alignment.int_sequence[alignment_index][0][n] != INSERTION) && + (alignment.int_sequence[alignment_index][0][n] != BEGIN_END_INSERTION)) { + if (type[k] != REAL_VALUE) { + os << setw(width) << int_sequence[ref_index][k][m++]; + } + else { + os << setw(width) << real_sequence[ref_index][k][m++]; + } + } + + else if (alignment.int_sequence[alignment_index][0][n] == INSERTION) { + os << setw(width) << "-"; + } + + else { + os << setw(width) << " "; + } + os << " "; + } + + if (j < alignment.length[alignment_index] - 1) { + os << "\\"; + } + os << endl; + } + + if (j < alignment.length[alignment_index] - 1) { + alignment_rank = j + 1; + ref_rank = m; + } + os << endl; + } + } + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the alignment of 2 sequences at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] ref_index reference sequence index, + * \param[in] test_index test sequence index, + * \param[in] alignment reference on the alignment, + * \param[in] alignment_index alignment index. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::alignment_spreadsheet_print(ostream &os , int ref_index , int test_index , + const Sequences &alignment , int alignment_index) const + +{ + int i , j , k; + + + os << "\n" << SEQ_label[SEQL_SEQUENCE] << " " << identifier[test_index] + << "\t" << SEQ_label[SEQL_LENGTH] << " " << length[test_index] << "\t" << SEQ_label[SEQL_ALIGNED_ON] + << "\t" << SEQ_label[SEQL_SEQUENCE] << " " << identifier[ref_index] + << "\t" << SEQ_label[SEQL_LENGTH] << " " << length[ref_index] << endl; + + // test sequence + + os << "\n"; + if (index_parameter) { + i = 0; + for (j = 0;j < alignment.length[alignment_index];j++) { + if ((alignment.int_sequence[alignment_index][0][j] != DELETION) && + (alignment.int_sequence[alignment_index][0][j] != BEGIN_END_DELETION)) { + os << index_parameter[test_index][i++]; + } + else if (alignment.int_sequence[alignment_index][0][j] == DELETION) { + os << "-"; + } + os << "\t"; + } + os << endl; + } + + for (i = 0;i < nb_variable;i++) { + j = 0; + for (k = 0;k < alignment.length[alignment_index];k++) { + if ((alignment.int_sequence[alignment_index][0][k] != DELETION) && + (alignment.int_sequence[alignment_index][0][k] != BEGIN_END_DELETION)) { + if (type[i] != REAL_VALUE) { + os << int_sequence[test_index][i][j++]; + } + else { + os << real_sequence[test_index][i][j++]; + } + } + + else if (alignment.int_sequence[alignment_index][0][k] == DELETION) { + os << "-"; + } + os << "\t"; + } + os << endl; + } + os << endl; + + // edit operations + + for (i = 0;i < alignment.length[alignment_index];i++) { + switch (alignment.int_sequence[alignment_index][0][i]) { + case DELETION : + os << "d"; + break; + case INSERTION : + os << "i"; + break; + case MATCH : + os << "|"; + break; + case SUBSTITUTION : + os << "s"; + break; + case TRANSPOSITION : + os << "t"; + break; + default : + os << " "; + break; + } + os << "\t"; + } + os << endl; + + // reference sequence + + os << "\n"; + if (index_parameter) { + i = 0; + for (j = 0;j < alignment.length[alignment_index];j++) { + if ((alignment.int_sequence[alignment_index][0][j] != INSERTION) && + (alignment.int_sequence[alignment_index][0][j] != BEGIN_END_INSERTION)) { + os << index_parameter[ref_index][i++]; + } + else if (alignment.int_sequence[alignment_index][0][j] == INSERTION) { + os << "-"; + } + os << "\t"; + } + os << endl; + } + + for (i = 0;i < nb_variable;i++) { + j = 0; + for (k = 0;k < alignment.length[alignment_index];k++) { + if ((alignment.int_sequence[alignment_index][0][k] != INSERTION) && + (alignment.int_sequence[alignment_index][0][k] != BEGIN_END_INSERTION)) { + if (type[i] != REAL_VALUE) { + os << int_sequence[ref_index][i][j++]; + } + else { + os << real_sequence[ref_index][i][j++]; + } + } + + else if (alignment.int_sequence[alignment_index][0][k] == INSERTION) { + os << "-"; + } + os << "\t"; + } + os << endl; + } + os << endl; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of a fixed insertion/deletion cost. + * + * \param[in] vector_dist reference on a VectorDistance object, + * \param[in] rank ranks (for ordinal variables), + * \param[in] max_category_distance maximum distances between categories. + * + * \return insertion/deletion cost. + */ +/*--------------------------------------------------------------*/ + +double Sequences::indel_distance_computation(const VectorDistance &vector_dist , + double **rank , double **max_category_distance) const + +{ + int i , j; + double ldistance , distance = 0.; + + + for (i = 0;i < vector_dist.get_nb_variable();i++) { + switch (vector_dist.get_var_type(i)) { + + case NOMINAL : { + if (!max_category_distance[i]) { + ldistance = 1.; + } + + else { + ldistance = 0.; + for (j = (int)min_value[i];j <= (int)max_value[i];j++) { + if (max_category_distance[i][j] > ldistance) { + ldistance = max_category_distance[i][j]; + } + } + } + break; + } + + case ORDINAL : { + ldistance = rank[i][(int)max_value[i]] - rank[i][(int)min_value[i]]; + break; + } + + case NUMERIC : { + ldistance = max_value[i] - min_value[i]; + break; + } + + case CIRCULAR : { + ldistance = MIN(max_value[i] - min_value[i] , vector_dist.get_period(i) / 2.); + break; + } + } + + switch (vector_dist.get_distance_type()) { + case ABSOLUTE_VALUE : + distance += vector_dist.get_weight(i) * fabs(ldistance) / vector_dist.get_dispersion(i); + break; + case QUADRATIC : + distance += vector_dist.get_weight(i) * ldistance * ldistance / vector_dist.get_dispersion(i); + break; + } + } + + if (vector_dist.get_distance_type() == QUADRATIC) { + distance = sqrt(distance); + } + + return distance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the insertion/deletion cost of a vector. + * + * \param[in] vector_dist reference on a VectorDistance object, + * \param[in] index sequence index, + * \param[in] position position in the sequence, + * \param[in] rank ranks (for ordinal variables), + * \param[in] max_category_distance maximum distances between categories. + * + * \return insertion/deletion cost. + */ +/*--------------------------------------------------------------*/ + +double Sequences::indel_distance_computation(const VectorDistance &vector_dist , + int index , int position , double **rank , + double **max_category_distance) const + +{ + int i; + double ldistance , distance = 0.; + + + for (i = 0;i < vector_dist.get_nb_variable();i++) { + switch (vector_dist.get_var_type(i)) { + + case NOMINAL : { + if (!max_category_distance[i]) { + ldistance = 1.; + } + else { + ldistance = max_category_distance[i][int_sequence[index][i][position]]; + } + break; + } + + case ORDINAL : { + ldistance = MAX(rank[i][int_sequence[index][i][position]] - rank[i][(int)min_value[i]] , + rank[i][(int)max_value[i]] - rank[i][int_sequence[index][i][position]]); + break; + } + + case NUMERIC : { + if (type[i] != REAL_VALUE) { + ldistance = MAX(int_sequence[index][i][position] - (int)min_value[i] , + (int)max_value[i] - int_sequence[index][i][position]); + } + else { + ldistance = MAX(real_sequence[index][i][position] - min_value[i] , + max_value[i] - real_sequence[index][i][position]); + } + break; + } + + case CIRCULAR : { + ldistance = MAX(int_sequence[index][i][position] - (int)min_value[i] , + (int)max_value[i] - int_sequence[index][i][position]); + if (ldistance > vector_dist.get_period(i) / 2.) { + ldistance = vector_dist.get_period(i) / 2.; + } + break; + } + } + + switch (vector_dist.get_distance_type()) { + case ABSOLUTE_VALUE : + distance += vector_dist.get_weight(i) * fabs(ldistance) / vector_dist.get_dispersion(i); + break; + case QUADRATIC : + distance += vector_dist.get_weight(i) * ldistance * ldistance / vector_dist.get_dispersion(i); + break; + } + } + + if (vector_dist.get_distance_type() == QUADRATIC) { + distance = sqrt(distance); + } + + return distance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the cost of substitution of one vector by another. + * + * \param[in] vector_dist reference on a VectorDistance object, + * \param[in] ref_index reference sequence index, + * \param[in] test_index test sequence index, + * \param[in] ref_position position in the reference sequence, + * \param[in] test_position position in the test sequence, + * \param[in] rank ranks (for ordinal variables), + * \param[in] test_seq pointer on the test sequences (multiple alignment). + * + * \return substitution cost. + */ +/*--------------------------------------------------------------*/ + +double Sequences::substitution_distance_computation(const VectorDistance &vector_dist , + int ref_index , int test_index , + int ref_position , int test_position , + double **rank , const Sequences *test_seq) const + +{ + int i; + double ldistance , distance = 0.; + + + if (!test_seq) { + test_seq = this; + } + + for (i = 0;i < vector_dist.get_nb_variable();i++) { + switch (vector_dist.get_var_type(i)) { + + case NOMINAL : { + if (!vector_dist.get_category_distance(i)) { + ldistance = (int_sequence[ref_index][i][ref_position] == test_seq->int_sequence[test_index][i][test_position] ? 0. : 1.); + } + else { + ldistance = vector_dist.get_category_distance(i , int_sequence[ref_index][i][ref_position] , + test_seq->int_sequence[test_index][i][test_position]); + } + break; + } + + case ORDINAL : { + ldistance = rank[i][int_sequence[ref_index][i][ref_position]] - rank[i][test_seq->int_sequence[test_index][i][test_position]]; + break; + } + + case NUMERIC : { + if (type[i] != REAL_VALUE) { + ldistance = int_sequence[ref_index][i][ref_position] - test_seq->int_sequence[test_index][i][test_position]; + } + else { + ldistance = real_sequence[ref_index][i][ref_position] - test_seq->real_sequence[test_index][i][test_position]; + } + break; + } + + case CIRCULAR : { + if (int_sequence[ref_index][i][ref_position] <= test_seq->int_sequence[test_index][i][test_position]) { + ldistance = MIN(test_seq->int_sequence[test_index][i][test_position] - int_sequence[ref_index][i][ref_position] , + int_sequence[ref_index][i][ref_position] + vector_dist.get_period(i) - + test_seq->int_sequence[test_index][i][test_position]); + } + else { + ldistance = MIN(int_sequence[ref_index][i][ref_position] - test_seq->int_sequence[test_index][i][test_position] , + test_seq->int_sequence[test_index][i][test_position] + vector_dist.get_period(i) - + int_sequence[ref_index][i][ref_position]); + } + break; + } + } + + switch (vector_dist.get_distance_type()) { + case ABSOLUTE_VALUE : + distance += vector_dist.get_weight(i) * fabs(ldistance) / vector_dist.get_dispersion(i); + break; + case QUADRATIC : + distance += vector_dist.get_weight(i) * ldistance * ldistance / vector_dist.get_dispersion(i); + break; + } + } + + if (vector_dist.get_distance_type() == QUADRATIC) { + distance = sqrt(distance); + } + + return distance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Alignment of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the alignments, + * \param[in] ivector_dist reference on a VectorDistance object, + * \param[in] ref_identifier reference sequence identifier, + * \param[in] test_identifier test sequence identifier, + * \param[in] begin_free flag begin-free alignment, + * \param[in] end_free flag end-free alignment, + * \param[in] indel_cost insertion/deletion costs adaptative or fixed, + * \param[in] indel_factor factor for deducing the insertion/deletion costs, + * \param[in] transposition_flag flag transposition, + * \param[in] transposition_factor factor for deducing the transposition costs, + * \param[in] result_path result file path, + * \param[in] result_format result file format (ASCII/SPREADSHEET), + * \param[in] alignment_path alignment file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* Sequences::alignment(StatError &error , ostream *os , const VectorDistance &ivector_dist , + int ref_identifier , int test_identifier , bool begin_free , + bool end_free , insertion_deletion_cost indel_cost , double indel_factor , + bool transposition_flag , double transposition_factor , + const string result_path , output_format result_format , + const string alignment_path) const + +{ + bool status = true , half_matrix; + int i , j , k , m; + int nb_alignment , ilength , alignment_index , var , width , ref_position , pref_position , + test_position , ptest_position , gap_length , max_gap_length , nb_deletion , nb_insertion , + nb_match , nb_substitution , nb_transposition , nb_begin_end , offset , *palignment , + *calignment , *category , **path_length , ***back_pointers; + double buff , deletion_distance , insertion_distance , substitution_distance , + transposition_distance , max_transposition_cost , **rank , **max_category_distance , + **local_indel_distance , **local_substitution_distance , **cumul_distance; + VectorDistance *vector_dist; + DistanceMatrix *dist_matrix; + Sequences *alignment; + ofstream *out_file; + + + dist_matrix = NULL; + error.init(); + + if (nb_sequence < 2) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + if (((index_param_type == TIME) && (index_interval->variance > 0.)) || + (index_param_type == POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + if (ivector_dist.get_nb_variable() != nb_variable) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + + else { + for (i = 0;i < nb_variable;i++) { + if (ivector_dist.get_var_type(i) != NUMERIC) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + if ((ivector_dist.get_var_type(i) == NOMINAL) && + ((min_value[i] < 0) || (max_value[i] >= NB_CATEGORY) || + ((ivector_dist.get_category_distance(i)) && (ivector_dist.get_nb_value(i) != max_value[i] + 1)))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_CATEGORY]; + error.update((error_message.str()).c_str()); + } + + if ((ivector_dist.get_var_type(i) == CIRCULAR) && + (max_value[i] - min_value[i] >= ivector_dist.get_period(i))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_VALUE_PERIOD]; + error.update((error_message.str()).c_str()); + } + } + + else if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + } + + if (ref_identifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (ref_identifier == identifier[i]) { + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_REF_SEQUENCE_IDENTIFIER]); + } + } + + if (test_identifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (test_identifier == identifier[i]) { + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_TEST_SEQUENCE_IDENTIFIER]); + } + } + + if ((ref_identifier != I_DEFAULT) && (test_identifier != I_DEFAULT) && (ref_identifier == test_identifier)) { + status = false; + error.correction_update(SEQ_error[SEQR_SEQUENCE_IDENTIFIERS] , "different"); + } + + nb_alignment = 1; + if (ref_identifier == I_DEFAULT) { + nb_alignment *= (nb_sequence - 1); + } + if (test_identifier == I_DEFAULT) { + nb_alignment *= (ref_identifier != I_DEFAULT ? nb_sequence - 1 : nb_sequence); + } + + if (nb_alignment > NB_ALIGNMENT) { + status = false; + error.update(SEQ_error[SEQR_NB_ALIGNMENT]); + } + + if (indel_factor <= 0.5) { + status = false; + error.update(SEQ_error[SEQR_INDEL_FACTOR]); + } + + if ((transposition_factor < 0.) || (transposition_factor >= 2.)) { + status = false; + error.update(SEQ_error[SEQR_TRANSPOSITION_FACTOR]); + } + + if (status) { + if ((ref_identifier == I_DEFAULT) && (test_identifier == I_DEFAULT) && ((result_path.empty()) || + (nb_alignment > FILE_NB_ALIGNMENT)) && (alignment_path.empty())) { + half_matrix = true; + } + else { + half_matrix = false; + } + + vector_dist = new VectorDistance(ivector_dist); + + // computation of the maximum substitution distance for nominal variables and + // the ranks for ordinal variables + + rank = new double*[nb_variable]; + max_category_distance = new double*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + if ((vector_dist->get_var_type(i) == NOMINAL) && (vector_dist->get_category_distance(i))) { + max_category_distance[i] = vector_dist->max_category_distance_computation(i); + } + else { + max_category_distance[i] = 0; + } + + if (vector_dist->get_var_type(i) == ORDINAL) { + rank[i] = marginal_distribution[i]->rank_computation(); + } + else { + rank[i] = NULL; + } + + // computation of dispersion measures for the standardization of variables + + if (marginal_distribution[i]) { + vector_dist->dispersion_computation(i , marginal_distribution[i] , rank[i]); + } + + else { + switch (vector_dist->get_distance_type()) { + case ABSOLUTE_VALUE : + vector_dist->dispersion_update(i , mean_absolute_difference_computation(i)); + break; + case QUADRATIC : + vector_dist->dispersion_update(i , 2 * variance_computation(i , mean_computation(i))); + break; + } + + if (vector_dist->get_dispersion(i) == 0.) { + vector_dist->dispersion_update(i , 1.); + } + } + } + +# ifdef DEBUG + cout << *vector_dist; + if (vector_dist->get_distance_type() == ABSOLUTE_VALUE) { + for (i = 0;i < nb_variable;i++) { + if (vector_dist->get_var_type(i) == NUMERIC) { + cout << "\n" << STAT_label[STATL_VARIABLE] << " " << i << " mean absolute difference: " + << mean_absolute_difference_computation(i) << endl; + } + } + } +# endif + + if (index_parameter) { + width = column_width(index_parameter_distribution->nb_value - 1); + } + else { + width = 0; + } + + for (i = 0;i < nb_variable;i++) { + if (type[i] != REAL_VALUE) { + var = column_width((int)min_value[i] , (int)max_value[i]); + if (var > width) { + width = var; + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((ref_identifier == I_DEFAULT) || (ref_identifier == identifier[j]) || + (test_identifier == I_DEFAULT) || (test_identifier == identifier[j])) { + var = column_width(length[j] , real_sequence[j][i]); + if (var > width) { + width = var; + } + } + } + } + } + + out_file = NULL; + + if (!result_path.empty()) { + out_file = new ofstream(result_path.c_str()); + + if (!out_file) { + error.update(STAT_error[STATR_FILE_NAME]); + if (os) { + *os << error; + } + } + } + + // construction of the result data structures + + dist_matrix = new DistanceMatrix(nb_sequence , ref_identifier , test_identifier , + SEQ_label[SEQL_SEQUENCE] , identifier , + true , transposition_flag); + + if (!alignment_path.empty()) { + alignment = new Sequences(nb_alignment , 1); + } + else { + ilength = max_length + max_length; + alignment = new Sequences(1 , NULL , &ilength , 1 , false); + } + + // construction of the algorithm data structures - computation of the insertion/deletion costs + + if (indel_cost == FIXED) { + buff = indel_distance_computation(*vector_dist , rank , max_category_distance) * indel_factor; + } + + local_indel_distance = new double*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + if ((ref_identifier == I_DEFAULT) || (ref_identifier == identifier[i]) || + (test_identifier == I_DEFAULT) || (test_identifier == identifier[i])) { + local_indel_distance[i] = new double[length[i] + 1]; + + switch (indel_cost) { + + case ADAPTATIVE : { + for (j = 1;j <= length[i];j++) { + local_indel_distance[i][j] = indel_distance_computation(*vector_dist , i , j - 1 , rank , max_category_distance) * indel_factor; + } + +# ifdef DEBUG +/* cout << "\ninsertion/deletion cost of the vectors of the sequence " << i << ": "; + for (j = 1;j <= length[i];j++) { + cout << local_indel_distance[i][j] << " "; + } + cout << endl; */ +# endif + + break; + } + + case FIXED : { + for (j = 1;j <= length[i];j++) { + local_indel_distance[i][j] = buff; + } + break; + } + } + } + + else { + local_indel_distance[i] = NULL; + } + } + + local_substitution_distance = new double*[max_length + 1]; + local_substitution_distance[0] = NULL; + for (i = 1;i <= max_length;i++) { + local_substitution_distance[i] = new double[max_length + 1]; + } + + cumul_distance = new double*[max_length + 1]; + for (i = 0;i <= max_length;i++) { + cumul_distance[i] = new double[max_length + 1]; + } + + path_length = new int*[max_length + 1]; + for (i = 0;i <= max_length;i++) { + path_length[i] = new int[max_length + 1]; + } + + back_pointers = new int**[max_length + 1]; + for (i = 0;i <= max_length;i++) { + back_pointers[i] = new int*[max_length + 1]; + for (j = 0;j <= max_length;j++) { + back_pointers[i][j] = new int[2]; + } + } + + // alignment of sequences + +# ifdef DEBUG + int nb_local_substitution_distance = 0; + double mean_local_substitution_distance = 0.; +# endif + + alignment_index = 0; + + for (i = 0;i < nb_sequence;i++) { + if ((ref_identifier == I_DEFAULT) || (ref_identifier == identifier[i])) { + for (j = (half_matrix ? i + 1 : 0);j < nb_sequence;j++) { + if (((test_identifier == I_DEFAULT) || (test_identifier == identifier[j])) && (j != i)) { + + // initialization of the cumulative distances and the corresponding alignment lengths + + cumul_distance[0][0] = 0.; + path_length[0][0] = 0; + + for (k = 1;k <= length[i];k++) { + + // deletion + + if (begin_free) { + cumul_distance[k][0] = cumul_distance[k - 1][0]; + } + else { + cumul_distance[k][0] = cumul_distance[k - 1][0] + local_indel_distance[i][k]; + } + + path_length[k][0] = k; + back_pointers[k][0][0] = k - 1; + back_pointers[k][0][1] = 0; + } + + for (k = 1;k <= length[j];k++) { + + // insertion + + if (begin_free) { + cumul_distance[0][k] = cumul_distance[0][k - 1]; + } + else { + cumul_distance[0][k] = cumul_distance[0][k - 1] + local_indel_distance[j][k]; + } + + path_length[0][k] = k; + back_pointers[0][k][0] = 0; + back_pointers[0][k][1] = k - 1; + } + + // computation of the cumulative distances and the corresponding alignment lengths + + for (k = 1;k <= length[i];k++) { + for (m = 1;m <= length[j];m++) { + + // computation of the distance of substitution of one vector by another + + local_substitution_distance[k][m] = substitution_distance_computation(*vector_dist , i , j , k - 1 , m - 1 , rank); + +# ifdef DEBUG + nb_local_substitution_distance++; + mean_local_substitution_distance += local_substitution_distance[k][m]; +# endif + + // match/substitution + + cumul_distance[k][m] = cumul_distance[k - 1][m - 1] + local_substitution_distance[k][m]; + path_length[k][m] = path_length[k - 1][m - 1] + 1; + back_pointers[k][m][0] = k - 1; + back_pointers[k][m][1] = m - 1; + + // deletion + + if ((m < length[j]) || (!end_free)) { + buff = cumul_distance[k - 1][m] + local_indel_distance[i][k]; + } + else { + buff = cumul_distance[k - 1][m]; + } + + if (buff < cumul_distance[k][m]) { + cumul_distance[k][m] = buff; + path_length[k][m] = path_length[k - 1][m] + 1; + back_pointers[k][m][0] = k - 1; + back_pointers[k][m][1] = m; + } + + // insertion + + if ((k < length[i]) || (!end_free)) { + buff = cumul_distance[k][m - 1] + local_indel_distance[j][m]; + } + else { + buff = cumul_distance[k][m - 1]; + } + + if (buff < cumul_distance[k][m]) { + cumul_distance[k][m] = buff; + path_length[k][m] = path_length[k][m - 1] + 1; + back_pointers[k][m][0] = k; + back_pointers[k][m][1] = m - 1; + } + + // transposition + + if ((transposition_flag) && (k > 1) && (m > 1) && + (local_substitution_distance[k][m] > 0.) && + (local_substitution_distance[k - 1][m] == 0.) && + (local_substitution_distance[k][m - 1] == 0.)) { + max_transposition_cost = local_substitution_distance[k][m]; + if (local_indel_distance[i][k - 1] < max_transposition_cost) { + max_transposition_cost = local_indel_distance[i][k - 1]; + } + if (local_indel_distance[i][k] < max_transposition_cost) { + max_transposition_cost = local_indel_distance[i][k]; + } + buff = cumul_distance[k - 2][m - 2] + max_transposition_cost * transposition_factor; + + if (buff < cumul_distance[k][m]) { + cumul_distance[k][m] = buff; + path_length[k][m] = path_length[k - 2][m - 2] + 2; + back_pointers[k][m][0] = k - 2; + back_pointers[k][m][1] = m - 2; + } + } + } + } + + // end free (alternative implementation) + +/* if (end_free) { + buff = cumul_distance[length[i]][length[j]]; + + k = length[i]; + for (m = 1;m < length[i];m++) { + if (cumul_distance[m][length[j]] < buff) { + buff = cumul_distance[m][length[j]]; + k = m; + } + } + for (m = k + 1;m <= length[i];m++) { + cumul_distance[m][length[j]] = buff; + path_length[m][length[j]] = path_length[m - 1][length[j]] + 1; + back_pointers[m][length[j]][0] = m - 1; + back_pointers[m][length[j]][1] = length[j]; + } + + k = length[j]; + for (m = 1;m < length[j];m++) { + if (cumul_distance[length[i]][m] < buff) { + buff = cumul_distance[length[i]][m]; + k = m; + } + } + for (m = k + 1;m <= length[j];m++) { + cumul_distance[length[i]][m] = buff; + path_length[length[i]][m] = path_length[length[i]][m - 1] + 1; + back_pointers[length[i]][m][0] = length[i]; + back_pointers[length[i]][m][1] = m - 1; + } + } */ + +# ifdef DEBUG +/* cout << "\n"; + for (k = length[i];k >= 0;k--) { + for (m = 0;m <= length[j];m++) { + cout << cumul_distance[k][m] << " "; + } + cout << endl; + } + cout << endl; */ +# endif + + alignment->length[alignment_index] = path_length[length[i]][length[j]]; + if (!alignment_path.empty()) { + alignment->int_sequence[alignment_index][0] = new int[alignment->length[alignment_index]]; + } + + // backtracking + + deletion_distance = 0.; + insertion_distance = 0.; + substitution_distance = 0.; + transposition_distance = 0.; + + palignment = alignment->int_sequence[alignment_index][0] + alignment->length[alignment_index]; + k = path_length[length[i]][length[j]]; + pref_position = length[i]; + ptest_position = length[j]; + +# ifdef DEBUG +// cout << pref_position << " " << ptest_position << endl; +# endif + + do { + ref_position = pref_position; + test_position = ptest_position; + pref_position = back_pointers[ref_position][test_position][0]; + ptest_position = back_pointers[ref_position][test_position][1]; + +# ifdef DEBUG +// cout << pref_position << " " << ptest_position << endl; +# endif + + if (test_position == ptest_position) { + if (((test_position > 0) || (!begin_free)) && ((test_position < length[j]) || (!end_free))) { + *--palignment = DELETION; + deletion_distance += local_indel_distance[i][ref_position]; + } + else { + *--palignment = BEGIN_END_DELETION; + } + k--; + } + + else if (ref_position == pref_position) { + if (((ref_position > 0) || (!begin_free)) && ((ref_position < length[i]) || (!end_free))) { + *--palignment = INSERTION; + insertion_distance += local_indel_distance[j][test_position]; + } + else { + *--palignment = BEGIN_END_INSERTION; + } + k--; + } + + else if ((ref_position == pref_position + 1) && (test_position == ptest_position + 1)) { + if (local_substitution_distance[ref_position][test_position] == 0.) { + *--palignment = MATCH; + } + else { + *--palignment = SUBSTITUTION; + substitution_distance += local_substitution_distance[ref_position][test_position]; + } + k--; + } + + else if ((ref_position == pref_position + 2) && (test_position == ptest_position + 2)) { + *--palignment = TRANSPOSITION; + *--palignment = TRANSPOSITION; + transposition_distance += local_substitution_distance[ref_position][test_position] * + transposition_factor; + k -= 2; + } + } + while (k > 0); + + // search for the maximum number of successive insertions/deletions + + palignment = alignment->int_sequence[alignment_index][0]; + max_gap_length = 0; + gap_length = 0; + + if ((*palignment == DELETION) || (*palignment == INSERTION)) { + gap_length++; + } + + for (k = 1;k < alignment->length[alignment_index];k++) { + if (*(palignment + 1) != *palignment) { + if (((*palignment == DELETION) || (*palignment == INSERTION)) && + (gap_length > max_gap_length)) { + max_gap_length = gap_length; + } + gap_length = 0; + } + + palignment++; + if ((*palignment == DELETION) || (*palignment == INSERTION)) { + gap_length++; + } + } + + if (((*palignment == DELETION) || (*palignment == INSERTION)) && + (gap_length > max_gap_length)) { + max_gap_length = gap_length; + } + + // update of the numbers of deletions, insertions, matchs, substitutions and transpositions + + palignment = alignment->int_sequence[alignment_index][0]; + + nb_deletion = 0; + nb_insertion = 0; + nb_match = 0; + nb_substitution = 0; + nb_transposition = 0; + nb_begin_end = 0; + + for (k = 0;k < alignment->length[alignment_index];k++) { + switch (*palignment++) { + case DELETION : + nb_deletion++; + break; + case INSERTION : + nb_insertion++; + break; + case MATCH : + nb_match++; + break; + case SUBSTITUTION : + nb_substitution++; + break; + case TRANSPOSITION : + nb_transposition++; + break; + default : + nb_begin_end++; + break; + } + } + + dist_matrix->update(identifier[i] , identifier[j] , cumul_distance[length[i]][length[j]] , + length[i] + length[j] - nb_begin_end , deletion_distance , nb_deletion , + insertion_distance , nb_insertion , nb_match , + substitution_distance , nb_substitution , + transposition_distance , nb_transposition); + + if (half_matrix) { + dist_matrix->update(identifier[j] , identifier[i] , cumul_distance[length[i]][length[j]] , + length[i] + length[j] - nb_begin_end , insertion_distance , nb_insertion , + deletion_distance , nb_deletion , nb_match , + substitution_distance , nb_substitution , + transposition_distance , nb_transposition); + } + +# ifdef DEBUG + { + double sum = deletion_distance + insertion_distance + substitution_distance + + transposition_distance; + if ((sum < cumul_distance[length[i]][length[j]] - DOUBLE_ERROR) || + (sum > cumul_distance[length[i]][length[j]] + DOUBLE_ERROR)) { + cout << "\nERROR: " << SEQ_label[SEQL_SEQUENCE] << " " << j + 1 << " aligned on " + << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " << cumul_distance[length[i]][length[j]] + << " | " << sum << endl; + } + } +# endif + + // writing of the alignment + + if ((os) && (nb_alignment <= DISPLAY_NB_ALIGNMENT)) { + alignment_ascii_print(*os , width , i , j , *alignment , alignment_index); + + if (length[i] + length[j] - nb_begin_end > 0) { + *os << STAT_label[STATL_DISTANCE] << " (" << SEQ_label[SEQL_ALIGNMENT_LENGTH] + << "): " << cumul_distance[length[i]][length[j]] / (length[i] + length[j] - nb_begin_end) + << " (" << path_length[length[i]][length[j]] - nb_begin_end << ") = " + << deletion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_deletion << " d) + " + << insertion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_insertion << " i) + 0 (" + << nb_match << " m) + " << substitution_distance / (length[i] + length[j] - nb_begin_end) + << " (" << nb_substitution << " s)"; + if (transposition_flag) { + *os << " + " << transposition_distance / (length[i] + length[j] - nb_begin_end) + << " (" << nb_transposition << " t)"; + } + *os << endl; + + *os << SEQ_label[SEQL_MAX_GAP_LENGTH] << ": " << max_gap_length << endl; + } + } + + if ((out_file) && (nb_alignment <= FILE_NB_ALIGNMENT)) { + switch (result_format) { + + case ASCII : { + alignment_ascii_print(*out_file , width , i , j , *alignment , alignment_index); + + if (length[i] + length[j] - nb_begin_end > 0) { + *out_file << STAT_label[STATL_DISTANCE] << " (" << SEQ_label[SEQL_ALIGNMENT_LENGTH] + << "): " << cumul_distance[length[i]][length[j]] / (length[i] + length[j] - nb_begin_end) + << " (" << path_length[length[i]][length[j]] - nb_begin_end << ") = " + << deletion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_deletion << " d) + " + << insertion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_insertion << " i) + 0 (" + << nb_match << " m) + " << substitution_distance / (length[i] + length[j] - nb_begin_end) + << " (" << nb_substitution << " s)"; + if (transposition_flag) { + *out_file << " + " << transposition_distance / (length[i] + length[j] - nb_begin_end) + << " (" << nb_transposition << " t)"; + } + *out_file << endl; + + *out_file << SEQ_label[SEQL_MAX_GAP_LENGTH] << ": " << max_gap_length << endl; + } + break; + } + + case SPREADSHEET : { + alignment_spreadsheet_print(*out_file , i , j , *alignment , alignment_index); + + if (length[i] + length[j] - nb_begin_end > 0) { + *out_file << STAT_label[STATL_DISTANCE] << " (" << SEQ_label[SEQL_ALIGNMENT_LENGTH] + << ")\t" << cumul_distance[length[i]][length[j]] / (length[i] + length[j] - nb_begin_end) + << " (" << path_length[length[i]][length[j]] - nb_begin_end << ")\t" + << deletion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_deletion << " d)\t" + << insertion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_insertion << " i)\t0 (" + << nb_match << " m)\t" << substitution_distance / (length[i] + length[j] - nb_begin_end) + << " (" << nb_substitution << " s)"; + if (transposition_flag) { + *out_file << "\t" << transposition_distance / (length[i] + length[j] - nb_begin_end) + << " (" << nb_transposition << " t)"; + } + *out_file << endl; + + *out_file << SEQ_label[SEQL_MAX_GAP_LENGTH] << "\t" << max_gap_length << endl; + } + break; + } + } + } + + if (!alignment_path.empty()) { + alignment_index++; + } + } + } + } + } + +# ifdef DEBUG + cout << "\nlocal distance: " + << mean_local_substitution_distance / nb_local_substitution_distance << endl; +# endif + + if ((ref_identifier == I_DEFAULT) || (test_identifier == I_DEFAULT)) { + + // writing of distances, alignment lengths, and numbers of deletions, insertions, + // matchs, substitutions and transpositions + + if ((os) && (nb_alignment <= DISPLAY_NB_ALIGNMENT)) { + *os << "\n"; + dist_matrix->ascii_write(*os); + } + + if (out_file) { + *out_file << "\n"; + + switch (result_format) { + case ASCII : + dist_matrix->ascii_write(*out_file); + break; + case SPREADSHEET : + dist_matrix->spreadsheet_write(*out_file); + break; + } + } + } + + if (!alignment_path.empty()) { + + // grouping of insertions/deletions and removing of insertions/deletions corresponding to + // begin of begin-free alignment or end of end-free alignment + + category = new int[(transposition_flag ? TRANSPOSITION : SUBSTITUTION) + 1]; + category[DELETION] = 0; + category[INSERTION] = 0; + category[MATCH] = 1; + category[SUBSTITUTION] = 2; + if (transposition_flag) { + category[TRANSPOSITION] = 3; + } + + if (os) { + *os << "\n" << SEQ_label[SEQL_ALIGNMENT_CODING] << "\n" << STAT_label[STATL_INDEL] << ": " << 0 + << "\n" << STAT_label[STATL_MATCH] << ": " << 1 << "\n" << STAT_label[STATL_SUBSTITUTION] << ": " << 2; + if (transposition_flag) { + *os << "\n" << STAT_label[STATL_TRANSPOSITION] << ": " << 3; + } + *os << endl; + } + + for (i = 0;i < alignment->nb_sequence;i++) { + calignment = alignment->int_sequence[i][0]; + offset = 0; + while ((*calignment == BEGIN_END_DELETION) || (*calignment == BEGIN_END_INSERTION)) { + offset++; + calignment++; + } + palignment = alignment->int_sequence[i][0]; + for (j = offset;j < alignment->length[i];j++) { + if ((*calignment == BEGIN_END_DELETION) || (*calignment == BEGIN_END_INSERTION)) { + break; + } + *palignment++ = category[*calignment++]; + } + alignment->length[i] = j - offset; + } + + delete [] category; + + alignment->min_value_computation(0); + alignment->max_value_computation(0); + alignment->build_marginal_frequency_distribution(0); + + alignment->max_length_computation(); + alignment->cumul_length_computation(); + alignment->build_length_frequency_distribution(); + + // writing of alignment sequences + + status = alignment->ascii_data_write(error , alignment_path); + if ((!status) && (os)) { + *os << error; + } + } + + if (out_file) { + out_file->close(); + delete out_file; + } + + delete vector_dist; + + for (i = 0;i < nb_variable;i++) { + delete [] rank[i]; + delete [] max_category_distance[i]; + } + delete [] rank; + delete [] max_category_distance; + + delete alignment; + + for (i = 0;i < nb_sequence;i++) { + if ((ref_identifier == I_DEFAULT) || (ref_identifier == identifier[i]) || + (test_identifier == I_DEFAULT) || (test_identifier == identifier[i])) { + delete [] local_indel_distance[i]; + } + } + delete [] local_indel_distance; + + for (i = 1;i <= max_length;i++) { + delete [] local_substitution_distance[i]; + } + delete [] local_substitution_distance; + + for (i = 0;i <= max_length;i++) { + delete [] cumul_distance[i]; + } + delete [] cumul_distance; + + for (i = 0;i <= max_length;i++) { + delete [] path_length[i]; + } + delete [] path_length; + + for (i = 0;i <= max_length;i++) { + for (j = 0;j <= max_length;j++) { + delete [] back_pointers[i][j]; + } + delete [] back_pointers[i]; + } + delete [] back_pointers; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of a fixed substitution cost between 2 different vectors. + * + * \param[in] ref_index reference sequence index, + * \param[in] test_index test sequence index, + * \param[in] ref_position position in the reference sequence, + * \param[in] test_position position in the test sequence, + * \param[in] substitution_distance substitution cost. + * + * \return substitution cost. + */ +/*--------------------------------------------------------------*/ + +double Sequences::substitution_distance_computation(int ref_index , int test_index , int ref_position , + int test_position , double substitution_distance) const + +{ + int i; + double distance = 0.; + + + for (i = 0;i < nb_variable;i++) { + if (int_sequence[ref_index][i][ref_position] != int_sequence[test_index][i][test_position]) { + distance = substitution_distance; + break; + } + } + + return distance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Alignment of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the alignments, + * \param[in] ref_identifier reference sequence identifier, + * \param[in] test_identifier test sequence identifier, + * \param[in] begin_free flag begin-free alignment, + * \param[in] end_free flag end-free alignment, + * \param[in] result_path result file path, + * \param[in] result_format result file format (ASCII/SPREADSHEET), + * \param[in] alignment_path alignment file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* Sequences::alignment(StatError &error , ostream *os , int ref_identifier , + int test_identifier , bool begin_free , bool end_free , + const string result_path , output_format result_format , + const string alignment_path) const + +{ + bool status = true , half_matrix; + int i , j , k , m; + int nb_alignment , ilength , alignment_index , var , width , ref_position , + pref_position , test_position , ptest_position , gap_length , max_gap_length , + nb_deletion , nb_insertion , nb_match , nb_begin_end , offset , *palignment , + *calignment , *category , **path_length , ***back_pointers; + double buff , deletion_distance , insertion_distance , substitution_distance , + **cumul_distance; + DistanceMatrix *dist_matrix; + Sequences *alignment; + ofstream *out_file; + + + dist_matrix = NULL; + error.init(); + + if (nb_sequence < 2) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + if (((index_param_type == TIME) && (index_interval->variance > 0.)) || + (index_param_type == POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (ref_identifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (ref_identifier == identifier[i]) { + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_REF_SEQUENCE_IDENTIFIER]); + } + } + + if (test_identifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (test_identifier == identifier[i]) { + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_TEST_SEQUENCE_IDENTIFIER]); + } + } + + if ((ref_identifier != I_DEFAULT) && (test_identifier != I_DEFAULT) && (ref_identifier == test_identifier)) { + status = false; + error.correction_update(SEQ_error[SEQR_SEQUENCE_IDENTIFIERS] , "different"); + } + + nb_alignment = 1; + if (ref_identifier == I_DEFAULT) { + nb_alignment *= (nb_sequence - 1); + } + if (test_identifier == I_DEFAULT) { + nb_alignment *= (ref_identifier != I_DEFAULT ? nb_sequence - 1 : nb_sequence); + } + + if (nb_alignment > NB_ALIGNMENT) { + status = false; + error.update(SEQ_error[SEQR_NB_ALIGNMENT]); + } + + if (status) { + if ((ref_identifier == I_DEFAULT) && (test_identifier == I_DEFAULT) && ((result_path.empty()) || + (nb_alignment > FILE_NB_ALIGNMENT)) && (alignment_path.empty())) { + half_matrix = true; + } + else { + half_matrix = false; + } + + substitution_distance = INDEL_DISTANCE * 2.1; + + if (index_parameter) { + width = column_width(index_parameter_distribution->nb_value - 1); + } + else { + width = 0; + } + for (i = 0;i < nb_variable;i++) { + var = column_width((int)min_value[i] , (int)max_value[i]); + if (var > width) { + width = var; + } + } + + out_file = NULL; + + if (!result_path.empty()) { + out_file = new ofstream(result_path.c_str()); + + if (!out_file) { + error.update(STAT_error[STATR_FILE_NAME]); + if (os) { + *os << error; + } + } + } + + // construction of the result data structures + + dist_matrix = new DistanceMatrix(nb_sequence , ref_identifier , test_identifier , + SEQ_label[SEQL_SEQUENCE] , identifier , false); + + if (!alignment_path.empty()) { + alignment = new Sequences(nb_alignment , 1); + } + else { + ilength = max_length + max_length; + alignment = new Sequences(1 , NULL , &ilength , 1 , false); + } + + // construction of the algorithm data structures + + cumul_distance = new double*[max_length + 1]; + for (i = 0;i <= max_length;i++) { + cumul_distance[i] = new double[max_length + 1]; + } + + path_length = new int*[max_length + 1]; + for (i = 0;i <= max_length;i++) { + path_length[i] = new int[max_length + 1]; + } + + back_pointers = new int**[max_length + 1]; + for (i = 0;i <= max_length;i++) { + back_pointers[i] = new int*[max_length + 1]; + for (j = 0;j <= max_length;j++) { + back_pointers[i][j] = new int[2]; + } + } + + // alignment of sequences + + alignment_index = 0; + + for (i = 0;i < nb_sequence;i++) { + if ((ref_identifier == I_DEFAULT) || (ref_identifier == identifier[i])) { + for (j = (half_matrix ? i + 1 : 0);j < nb_sequence;j++) { + if (((test_identifier == I_DEFAULT) || (test_identifier == identifier[j])) && (j != i)) { + + // initialization of the cumulative distances and the corresponding alignment lengths + + cumul_distance[0][0] = 0.; + path_length[0][0] = 0; + + for (k = 1;k <= length[i];k++) { + + // deletion + + if (begin_free) { + cumul_distance[k][0] = cumul_distance[k - 1][0]; + } + else { + cumul_distance[k][0] = cumul_distance[k - 1][0] + INDEL_DISTANCE; + } + + path_length[k][0] = k; + back_pointers[k][0][0] = k - 1; + back_pointers[k][0][1] = 0; + } + + for (k = 1;k <= length[j];k++) { + + // insertion + + if (begin_free) { + cumul_distance[0][k] = cumul_distance[0][k - 1]; + } + else { + cumul_distance[0][k] = cumul_distance[0][k - 1] + INDEL_DISTANCE; + } + + path_length[0][k] = k; + back_pointers[0][k][0] = 0; + back_pointers[0][k][1] = k - 1; + } + + // computation of the cumulative distances and the corresponding alignment lengths + + for (k = 1;k <= length[i];k++) { + for (m = 1;m <= length[j];m++) { + + // match/substitution + + cumul_distance[k][m] = cumul_distance[k - 1][m - 1] + + substitution_distance_computation(i , j , k - 1 , m - 1 , substitution_distance); + path_length[k][m] = path_length[k - 1][m - 1] + 1; + back_pointers[k][m][0] = k - 1; + back_pointers[k][m][1] = m - 1; + + // deletion + + if ((m < length[j]) || (!end_free)) { + buff = cumul_distance[k - 1][m] + INDEL_DISTANCE; + } + else { + buff = cumul_distance[k - 1][m]; + } + + if (buff < cumul_distance[k][m]) { + cumul_distance[k][m] = buff; + path_length[k][m] = path_length[k - 1][m] + 1; + back_pointers[k][m][0] = k - 1; + back_pointers[k][m][1] = m; + } + + // insertion + + if ((k < length[i]) || (!end_free)) { + buff = cumul_distance[k][m - 1] + INDEL_DISTANCE; + } + else { + buff = cumul_distance[k][m - 1]; + } + + if (buff < cumul_distance[k][m]) { + cumul_distance[k][m] = buff; + path_length[k][m] = path_length[k][m - 1] + 1; + back_pointers[k][m][0] = k; + back_pointers[k][m][1] = m - 1; + } + } + } + +# ifdef DEBUG +/* cout << "\n"; + for (k = length[i];k >= 0;k--) { + for (m = 0;m <= length[j];m++) { + cout << cumul_distance[k][m] << " "; + } + cout << endl; + } + cout << endl; */ +# endif + + alignment->length[alignment_index] = path_length[length[i]][length[j]]; + if (!alignment_path.empty()) { + alignment->int_sequence[alignment_index][0] = new int[alignment->length[alignment_index]]; + } + + // backtracking + + deletion_distance = 0.; + insertion_distance = 0.; + + palignment = alignment->int_sequence[alignment_index][0] + alignment->length[alignment_index]; + pref_position = length[i]; + ptest_position = length[j]; + +# ifdef DEBUG +// cout << pref_position << " " << ptest_position << endl; +# endif + + for (k = path_length[length[i]][length[j]];k > 0;k--) { + ref_position = pref_position; + test_position = ptest_position; + pref_position = back_pointers[ref_position][test_position][0]; + ptest_position = back_pointers[ref_position][test_position][1]; + +# ifdef DEBUG +// cout << pref_position << " " << ptest_position << endl; +# endif + + if (test_position == ptest_position) { + if (((test_position > 0) || (!begin_free)) && ((test_position < length[j]) || (!end_free))) { + *--palignment = DELETION; + deletion_distance += INDEL_DISTANCE; + } + else { + *--palignment = BEGIN_END_DELETION; + } + } + + else if (ref_position == pref_position) { + if (((ref_position > 0) || (!begin_free)) && ((ref_position < length[i]) || (!end_free))) { + *--palignment = INSERTION; + insertion_distance += INDEL_DISTANCE; + } + else { + *--palignment = BEGIN_END_INSERTION; + } + } + + else if ((ref_position == pref_position + 1) && (test_position == ptest_position + 1)) { + *--palignment = MATCH; + } + } + + // search for the maximum number of successive insertions/deletions + + palignment = alignment->int_sequence[alignment_index][0]; + max_gap_length = 0; + gap_length = 0; + + if ((*palignment == DELETION) || (*palignment == INSERTION)) { + gap_length++; + } + + for (k = 1;k < alignment->length[alignment_index];k++) { + if (*(palignment + 1) != *palignment) { + if (((*palignment == DELETION) || (*palignment == INSERTION)) && + (gap_length > max_gap_length)) { + max_gap_length = gap_length; + } + gap_length = 0; + } + + palignment++; + if (((*palignment == DELETION) || (*palignment == INSERTION)) && + ((gap_length == 0) || (*palignment == *(palignment - 1)))) { + gap_length++; + } + } + + if (((*palignment == DELETION) || (*palignment == INSERTION)) && + (gap_length > max_gap_length)) { + max_gap_length = gap_length; + } + + // update of the numbers of deletions, insertions and matchs + + palignment = alignment->int_sequence[alignment_index][0]; + + nb_deletion = 0; + nb_insertion = 0; + nb_match = 0; + nb_begin_end = 0; + + for (k = 0;k < alignment->length[alignment_index];k++) { + switch (*palignment++) { + case DELETION : + nb_deletion++; + break; + case INSERTION : + nb_insertion++; + break; + case MATCH : + nb_match++; + break; + default : + nb_begin_end++; + break; + } + } + + dist_matrix->update(identifier[i] , identifier[j] , cumul_distance[length[i]][length[j]] , + length[i] + length[j] - nb_begin_end , deletion_distance , nb_deletion , + insertion_distance , nb_insertion , nb_match); + + if (half_matrix) { + dist_matrix->update(identifier[j] , identifier[i] , cumul_distance[length[i]][length[j]] , + length[i] + length[j] - nb_begin_end , insertion_distance , nb_insertion , + deletion_distance , nb_deletion , nb_match); + } + +# ifdef DEBUG + { + double sum = deletion_distance + insertion_distance; + if ((sum < cumul_distance[length[i]][length[j]] - DOUBLE_ERROR) || + (sum > cumul_distance[length[i]][length[j]] + DOUBLE_ERROR)) { + cout << "\nERROR: " << SEQ_label[SEQL_SEQUENCE] << " " << j + 1 << " aligned on " + << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " << cumul_distance[length[i]][length[j]] + << " | " << sum << endl; + } + } +# endif + + // writing of the alignment + + if ((os) && (nb_alignment <= DISPLAY_NB_ALIGNMENT)) { + alignment_ascii_print(*os , width , i , j , *alignment , alignment_index); + + if (length[i] + length[j] - nb_begin_end > 0) { + *os << STAT_label[STATL_DISTANCE] << " (" << SEQ_label[SEQL_ALIGNMENT_LENGTH] + << "): " << cumul_distance[length[i]][length[j]] / (length[i] + length[j] - nb_begin_end) + << " (" << path_length[length[i]][length[j]] - nb_begin_end << ") = " + << deletion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_deletion << " d) + " + << insertion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_insertion << " i) + 0 (" + << nb_match << " m)" << endl; + + *os << SEQ_label[SEQL_MAX_GAP_LENGTH] << ": " << max_gap_length << endl; + } + } + + if ((out_file) && (nb_alignment <= FILE_NB_ALIGNMENT)) { + switch (result_format) { + + case ASCII : { + alignment_ascii_print(*out_file , width , i , j , *alignment , alignment_index); + + if (length[i] + length[j] - nb_begin_end > 0) { + *out_file << STAT_label[STATL_DISTANCE] << " (" << SEQ_label[SEQL_ALIGNMENT_LENGTH] + << "): " << cumul_distance[length[i]][length[j]] / (length[i] + length[j] - nb_begin_end) + << " (" << path_length[length[i]][length[j]] - nb_begin_end << ") = " + << deletion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_deletion << " d) + " + << insertion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_insertion << " i) + 0 (" + << nb_match << " m)" << endl; + + *out_file << SEQ_label[SEQL_MAX_GAP_LENGTH] << ": " << max_gap_length << endl; + } + break; + } + + case SPREADSHEET : { + alignment_spreadsheet_print(*out_file , i , j , *alignment , alignment_index); + + if (length[i] + length[j] - nb_begin_end > 0) { + *out_file << STAT_label[STATL_DISTANCE] << " (" << SEQ_label[SEQL_ALIGNMENT_LENGTH] + << ")\t" << cumul_distance[length[i]][length[j]] / (length[i] + length[j] - nb_begin_end) + << " (" << path_length[length[i]][length[j]] - nb_begin_end << ")\t" + << deletion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_deletion << " d)\t" + << insertion_distance / (length[i] + length[j] - nb_begin_end) << " (" << nb_insertion << " i)\t0 (" + << nb_match << " m)" << endl; + + *out_file << SEQ_label[SEQL_MAX_GAP_LENGTH] << "\t" << max_gap_length << endl; + } + break; + } + } + } + + if (!alignment_path.empty()) { + alignment_index++; + } + } + } + } + } + + if ((ref_identifier == I_DEFAULT) || (test_identifier == I_DEFAULT)) { + + // writing of distances, alignment lengths, and numbers of deletions, insertions and matchs + + if ((os) && (nb_alignment <= DISPLAY_NB_ALIGNMENT)) { + *os << "\n"; + dist_matrix->ascii_write(*os); + } + + if (out_file) { + *out_file << "\n"; + + switch (result_format) { + case ASCII : + dist_matrix->ascii_write(*out_file); + break; + case SPREADSHEET : + dist_matrix->spreadsheet_write(*out_file); + break; + } + } + } + + if (!alignment_path.empty()) { + + // grouping of insertions/deletions and removing of insertions/deletions corresponding + // to begin of begin-free alignment or end of end-free alignment + + category = new int[MATCH + 1]; + category[DELETION] = 0; + category[INSERTION] = 0; + category[MATCH] = 1; + + if (os) { + *os << "\n" << SEQ_label[SEQL_ALIGNMENT_CODING] << "\n" << STAT_label[STATL_INDEL] << ": " << 0 + << "\n" << STAT_label[STATL_MATCH] << ": " << 1 << endl; + } + + for (i = 0;i < alignment->nb_sequence;i++) { + calignment = alignment->int_sequence[i][0]; + offset = 0; + while ((*calignment == BEGIN_END_DELETION) || (*calignment == BEGIN_END_INSERTION)) { + offset++; + calignment++; + } + palignment = alignment->int_sequence[i][0]; + for (j = offset;j < alignment->length[i];j++) { + if ((*calignment == BEGIN_END_DELETION) || (*calignment == BEGIN_END_INSERTION)) { + break; + } + *palignment++ = category[*calignment++]; + } + alignment->length[i] = j - offset; + } + + delete [] category; + + alignment->min_value_computation(0); + alignment->max_value_computation(0); + alignment->build_marginal_frequency_distribution(0); + + alignment->max_length_computation(); + alignment->cumul_length_computation(); + alignment->build_length_frequency_distribution(); + + // writing of alignment sequences + + status = alignment->ascii_data_write(error , alignment_path); + if ((!status) && (os)) { + *os << error; + } + } + + if (out_file) { + out_file->close(); + delete out_file; + } + + delete alignment; + + for (i = 0;i <= max_length;i++) { + delete [] cumul_distance[i]; + } + delete [] cumul_distance; + + for (i = 0;i <= max_length;i++) { + delete [] path_length[i]; + } + delete [] path_length; + + for (i = 0;i <= max_length;i++) { + for (j = 0;j <= max_length;j++) { + delete [] back_pointers[i][j]; + } + delete [] back_pointers[i]; + } + delete [] back_pointers; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a multiple alignment of sequences. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::multiple_alignment_ascii_print(ostream &os) const + +{ + int i , j , k , m; + int var , width , rank; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + width = 0; + for (i = 0;i < nb_variable - 1;i++) { + var = column_width((int)min_value[i] , (int)max_value[i]); + if (var > width) { + width = var; + } + } + + rank = 0; + for (i = 0;i < max_length;i++) { + if ((int_sequence[0][nb_variable - 1][i] != GAP) && + (int_sequence[0][nb_variable - 1][i] != BEGIN_END_GAP)) { + os << setw(width) << int_sequence[0][0][i]; + } + else if (int_sequence[0][nb_variable - 1][i] == GAP) { + os << setw(width) << "-"; + } + else { + os << setw(width) << " "; + } + os << " "; + + if (((i - rank) * (width + 1) > LINE_NB_CHARACTER) || (i == max_length - 1)) { + if (i < max_length - 1) { + os << "\\"; + } + else { + os << " (" << identifier[0] << ")"; + } + os << endl; + + for (j = 1;j < nb_variable - 1;j++) { + for (k = rank;k <= i;k++) { + if ((int_sequence[0][nb_variable - 1][k] != GAP) && + (int_sequence[0][nb_variable - 1][k] != BEGIN_END_GAP)) { + os << setw(width) << int_sequence[0][j][k]; + } + else if (int_sequence[0][nb_variable - 1][k] == GAP) { + os << setw(width) << "-"; + } + else { + os << setw(width) << " "; + } + os << " "; + } + + if (i < max_length - 1) { + os << "\\"; + } + os << endl; + } + os << endl; + + for (j = 1;j < nb_sequence;j++) { + for (k = 0;k < nb_variable - 1;k++) { + for (m = rank;m <= i;m++) { + if ((int_sequence[j][nb_variable - 1][m] != GAP) && + (int_sequence[j][nb_variable - 1][m] != BEGIN_END_GAP)) { + os << setw(width) << int_sequence[j][k][m]; + } + else if (int_sequence[j][nb_variable - 1][m] == GAP) { + os << setw(width) << "-"; + } + else { + os << setw(width) << " "; + } + os << " "; + } + + if (i < max_length - 1) { + os << "\\"; + } + else if (k == 0) { + os << " (" << identifier[j] << ")"; + } + os << endl; + } + os << endl; + } + os << endl; + + // writing of the consensus sequence + + for (j = 0;j < nb_variable - 1;j++) { + for (k = rank;k <= i;k++) { + for (m = 0;m < nb_sequence;m++) { + if ((int_sequence[m][nb_variable - 1][k] == GAP) || + (int_sequence[m][nb_variable - 1][k] == BEGIN_END_GAP) || + (int_sequence[m][j][k] != int_sequence[0][j][k])) { + break; + } + } + + if (m == nb_sequence) { + os << setw(width) << int_sequence[0][j][k]; + } + else { + os << setw(width) << "."; + } + os << " "; + } + + if (i < max_length - 1) { + os << "\\"; + } + else if (j == 0) { + os << " " << SEQ_label[SEQL_CONSENSUS]; + } + os << endl; + } + os << "\n" << endl; + + if (i < max_length - 1) { + rank = i + 1; + } + } + } + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a multiple alignment of sequences in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::multiple_alignment_ascii_print(StatError &error , const string path) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + multiple_alignment_ascii_print(out_file); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Multiple alignment of sequences. + * + * \param[in] test_seq reference on the test sequences + * \param[in] vector_dist reference on a VectorDistance object, + * \param[in] rank ranks (for ordinal variables), + * \param[in] max_category_distance maximum distances between categories, + * \param[in] begin_free flag begin-free alignment, + * \param[in] end_free flag end-free alignment, + * \param[in] indel_cost insertion/deletion costs adaptative or fixed, + * \param[in] indel_factor factor for deducing the insertion/deletion costs. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::multiple_alignment(const Sequences &test_seq , const VectorDistance &vector_dist , + double **rank , double **max_category_distance , bool begin_free , + bool end_free , insertion_deletion_cost indel_cost , double indel_factor) const + +{ + int i , j , k , m; + int ref_position , pref_position , test_position , ptest_position , *alignment , *palignment , + *ilength , **path_length , ***back_pointers; + double buff , sum , **ref_local_indel_distance , **test_local_indel_distance , **cumul_distance; + Sequences *seq; + + + // construction of the algorithm data structures - computation of the insertion/deletion costs + + if (indel_cost == FIXED) { + buff = indel_distance_computation(vector_dist , rank , max_category_distance) * indel_factor; + } + + ref_local_indel_distance = new double*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + ref_local_indel_distance[i] = new double[max_length + 1]; + + switch (indel_cost) { + + case ADAPTATIVE : { + for (j = 1;j <= max_length;j++) { + if (int_sequence[i][nb_variable - 1][j - 1] == DATA) { + ref_local_indel_distance[i][j] = indel_distance_computation(vector_dist , i , j - 1 , rank , max_category_distance) * indel_factor; + } + else { + ref_local_indel_distance[i][j] = 0.; + } + } + break; + } + + case FIXED : { + for (j = 1;j <= max_length;j++) { + if (int_sequence[i][nb_variable - 1][j - 1] == DATA) { + ref_local_indel_distance[i][j] = buff; + } + else { + ref_local_indel_distance[i][j] = 0.; + } + } + break; + } + } + } + + test_local_indel_distance = new double*[test_seq.nb_sequence]; + for (i = 0;i < test_seq.nb_sequence;i++) { + test_local_indel_distance[i] = new double[test_seq.max_length + 1]; + + // computation of the insertion/deletion costs + + switch (indel_cost) { + + case ADAPTATIVE : { + for (j = 1;j <= test_seq.max_length;j++) { + if (test_seq.int_sequence[i][nb_variable - 1][j - 1] == DATA) { + test_local_indel_distance[i][j] = test_seq.indel_distance_computation(vector_dist , i , j - 1 , rank , max_category_distance) * indel_factor; + } + else { + test_local_indel_distance[i][j] = 0.; + } + } + break; + } + + case FIXED : { + for (j = 1;j <= test_seq.max_length;j++) { + if (test_seq.int_sequence[i][nb_variable - 1][j - 1] == DATA) { + test_local_indel_distance[i][j] = buff; + } + else { + test_local_indel_distance[i][j] = 0.; + } + } + break; + } + } + } + + cumul_distance = new double*[max_length + 1]; + for (i = 0;i <= max_length;i++) { + cumul_distance[i] = new double[test_seq.max_length + 1]; + } + + path_length = new int*[max_length + 1]; + for (i = 0;i <= max_length;i++) { + path_length[i] = new int[test_seq.max_length + 1]; + } + + back_pointers = new int**[max_length + 1]; + for (i = 0;i <= max_length;i++) { + back_pointers[i] = new int*[test_seq.max_length + 1]; + for (j = 0;j <= test_seq.max_length;j++) { + back_pointers[i][j] = new int[2]; + } + } + + alignment = new int[max_length + test_seq.max_length]; + + // initialization of the cumulative distances and the corresponding alignment lengths + + cumul_distance[0][0] = 0.; + path_length[0][0] = 0; + + for (i = 1;i <= max_length;i++) { + + // deletion + + if (begin_free) { + cumul_distance[i][0] = cumul_distance[i - 1][0]; + } + + else { + sum = 0.; + for (j = 0;j < nb_sequence;j++) { + if (int_sequence[j][nb_variable - 1][i - 1] == DATA) { + sum += ref_local_indel_distance[j][i]; + } + } + cumul_distance[i][0] = cumul_distance[i - 1][0] + sum / nb_sequence; + } + + path_length[i][0] = i; + back_pointers[i][0][0] = i - 1; + back_pointers[i][0][1] = 0; + } + + for (i = 1;i <= test_seq.max_length;i++) { + + // insertion + + if (begin_free) { + cumul_distance[0][i] = cumul_distance[0][i - 1]; + } + + else { + sum = 0.; + for (j = 0;j < test_seq.nb_sequence;j++) { + if (test_seq.int_sequence[j][nb_variable - 1][i - 1] == DATA) { + sum += test_local_indel_distance[j][i]; + } + } + cumul_distance[0][i] = cumul_distance[0][i - 1] + sum / test_seq.nb_sequence; + } + + path_length[0][i] = i; + back_pointers[0][i][0] = 0; + back_pointers[0][i][1] = i - 1; + } + + // computation of the cumulative distances and the corresponding alignment lengths + + for (i = 1;i <= max_length;i++) { + for (j = 1;j <= test_seq.max_length;j++) { + + sum = 0.; + for (k = 0;k < nb_sequence;k++) { + for (m = 0;m < test_seq.nb_sequence;m++) { + + // computation of the distance of substitution of one vector by another + + if ((int_sequence[k][nb_variable - 1][i - 1] == DATA) && (test_seq.int_sequence[m][nb_variable - 1][j - 1] == GAP)) { + sum += ref_local_indel_distance[k][i]; + } + else if ((int_sequence[k][nb_variable - 1][i - 1] == GAP) && (test_seq.int_sequence[m][nb_variable - 1][j - 1] == DATA)) { + sum += test_local_indel_distance[m][j]; + } + else if ((int_sequence[k][nb_variable - 1][i - 1] == DATA) && (test_seq.int_sequence[m][nb_variable - 1][j - 1] == DATA)) { + sum += substitution_distance_computation(vector_dist , k , m , i - 1 , j - 1 , rank , &test_seq); + } + } + } + + // match/substitution + + cumul_distance[i][j] = cumul_distance[i - 1][j - 1] + sum / (nb_sequence * test_seq.nb_sequence); + path_length[i][j] = path_length[i - 1][j - 1] + 1; + back_pointers[i][j][0] = i - 1; + back_pointers[i][j][1] = j - 1; + + // deletion + + if ((j < test_seq.max_length) || (!end_free)) { + sum = 0.; + for (k = 0;k < nb_sequence;k++) { + if (int_sequence[k][nb_variable - 1][i - 1] == DATA) { + sum += ref_local_indel_distance[k][i]; + } + } + buff = cumul_distance[i - 1][j] + sum / nb_sequence; + } + + else { + buff = cumul_distance[i - 1][j]; + } + + if (buff < cumul_distance[i][j]) { + cumul_distance[i][j] = buff; + path_length[i][j] = path_length[i - 1][j] + 1; + back_pointers[i][j][0] = i - 1; + back_pointers[i][j][1] = j; + } + + // insertion + + if ((i < max_length) || (!end_free)) { + sum = 0.; + for (k = 0;k < test_seq.nb_sequence;k++) { + if (test_seq.int_sequence[k][nb_variable - 1][j - 1] == DATA) { + sum += test_local_indel_distance[k][j]; + } + } + buff = cumul_distance[i][j - 1] + sum / test_seq.nb_sequence; + } + + else { + buff = cumul_distance[i][j - 1]; + } + + if (buff < cumul_distance[i][j]) { + cumul_distance[i][j] = buff; + path_length[i][j] = path_length[i][j - 1] + 1; + back_pointers[i][j][0] = i; + back_pointers[i][j][1] = j - 1; + } + } + } + +# ifdef DEBUG + cout << "\nMultiple alignment distance: " << cumul_distance[max_length][test_seq.max_length] << endl; +# endif + + // backtracking + + palignment = alignment + path_length[max_length][test_seq.max_length]; + pref_position = max_length; + ptest_position = test_seq.max_length; + +# ifdef DEBUG +// cout << pref_position << " " << ptest_position << endl; +# endif + + for (i = path_length[max_length][test_seq.max_length];i > 0;i--) { + ref_position = pref_position; + test_position = ptest_position; + pref_position = back_pointers[ref_position][test_position][0]; + ptest_position = back_pointers[ref_position][test_position][1]; + +# ifdef DEBUG +// cout << pref_position << " " << ptest_position << endl; +# endif + + if (test_position == ptest_position) { + if (((test_position > 0) || (!begin_free)) && ((test_position < test_seq.max_length) || (!end_free))) { + *--palignment = DELETION; + } + else { + *--palignment = BEGIN_END_DELETION; + } + } + + else if (ref_position == pref_position) { + if (((ref_position > 0) || (!begin_free)) && ((ref_position < max_length) || (!end_free))) { + *--palignment = INSERTION; + } + else { + *--palignment = BEGIN_END_INSERTION; + } + } + + else if ((ref_position == pref_position + 1) && (test_position == ptest_position + 1)) { + *--palignment = SUBSTITUTION; + } + } + + // construction of the group of sequences + + ilength = new int[nb_sequence + test_seq.nb_sequence]; + for (i = 0;i < nb_sequence + test_seq.nb_sequence;i++) { + ilength[i] = path_length[max_length][test_seq.max_length]; + } + seq = new Sequences(nb_sequence + test_seq.nb_sequence , NULL , ilength , + nb_variable , type); + delete [] ilength; + + for (i = 0;i < nb_variable;i++) { + seq->min_value[i] = min_value[i]; + seq->max_value[i] = max_value[i]; + } + + for (i = 0;i < nb_sequence;i++) { + seq->identifier[i] = identifier[i]; + } + for (i = 0;i < test_seq.nb_sequence;i++) { + seq->identifier[nb_sequence + i] = test_seq.identifier[i]; + } + + palignment = alignment; + ref_position = 0; + test_position = 0; + + for (i = 0;i < path_length[max_length][test_seq.max_length];i++) { + if ((*palignment != INSERTION) && (*palignment != BEGIN_END_INSERTION)) { + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < nb_variable;k++) { + seq->int_sequence[j][k][i] = int_sequence[j][k][ref_position]; + } + } + ref_position++; + } + + else { + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < nb_variable - 1;k++) { + seq->int_sequence[j][k][i] = (int)max_value[k] + 1; + } + + switch (*palignment) { + + case INSERTION : { + if (((i > 0) && (int_sequence[j][nb_variable - 1][ref_position - 1] == BEGIN_END_GAP)) || + ((i < path_length[max_length][test_seq.max_length] - 1) && (int_sequence[j][nb_variable - 1][ref_position + 1] == BEGIN_END_GAP))) { + seq->int_sequence[j][nb_variable - 1][i] = BEGIN_END_GAP; + } + else { + seq->int_sequence[j][nb_variable - 1][i] = GAP; + } + break; + } + + case BEGIN_END_INSERTION : { + seq->int_sequence[j][nb_variable - 1][i] = BEGIN_END_GAP; + break; + } + } + } + } + + if ((*palignment != DELETION) && (*palignment != BEGIN_END_DELETION)) { + for (j = 0;j < test_seq.nb_sequence;j++) { + for (k = 0;k < nb_variable;k++) { + seq->int_sequence[j + nb_sequence][k][i] = test_seq.int_sequence[j][k][test_position]; + } + } + test_position++; + } + + else { + for (j = 0;j < test_seq.nb_sequence;j++) { + for (k = 0;k < nb_variable - 1;k++) { + seq->int_sequence[j + nb_sequence][k][i] = (int)max_value[k] + 1; + } + + switch (*palignment) { + + case DELETION : { + if (((i > 0) && (test_seq.int_sequence[j][nb_variable - 1][test_position - 1] == BEGIN_END_GAP)) || + ((i < path_length[max_length][test_seq.max_length] - 1) && (test_seq.int_sequence[j][nb_variable - 1][test_position + 1] == BEGIN_END_GAP))) { + seq->int_sequence[j + nb_sequence][nb_variable - 1][i] = BEGIN_END_GAP; + } + else { + seq->int_sequence[j + nb_sequence][nb_variable - 1][i] = GAP; + } + break; + } + + case BEGIN_END_DELETION : { + seq->int_sequence[j + nb_sequence][nb_variable - 1][i] = BEGIN_END_GAP; + break; + } + } + } + } + + palignment++; + } + + for (i = 0;i < nb_sequence;i++) { + delete [] ref_local_indel_distance[i]; + } + delete [] ref_local_indel_distance; + + for (i = 0;i < test_seq.nb_sequence;i++) { + delete [] test_local_indel_distance[i]; + } + delete [] test_local_indel_distance; + + for (i = 0;i <= max_length;i++) { + delete [] cumul_distance[i]; + } + delete [] cumul_distance; + + for (i = 0;i <= max_length;i++) { + delete [] path_length[i]; + } + delete [] path_length; + + for (i = 0;i <= max_length;i++) { + for (j = 0;j <= test_seq.max_length;j++) { + delete [] back_pointers[i][j]; + } + delete [] back_pointers[i]; + } + delete [] back_pointers; + + delete [] alignment; + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Multiple alignment of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the alignment, + * \param[in] ivector_dist reference on a VectorDistance object, + * \param[in] begin_free flag begin-free alignment, + * \param[in] end_free flag end-free alignment, + * \param[in] indel_cost insertion/deletion costs adaptative or fixed, + * \param[in] indel_factor factor for deducing the insertion/deletion costs, + * \param[in] strategy type of algorithm for the dendrogram construction, + * \param[in] path file path. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::multiple_alignment(StatError &error , ostream *os , + const VectorDistance &ivector_dist , + bool begin_free , bool end_free , + insertion_deletion_cost indel_cost , double indel_factor , + hierarchical_strategy strategy , + const string path) const + +{ + bool status = true; + int i , j , k; + int *itype , *psequence , *csequence , *variable; + double **rank , **max_category_distance; + VectorDistance *vector_dist; + DistanceMatrix *dist_matrix; + Dendrogram *dendrogram; + Sequences *seq , **clustered_seq; + + + seq = NULL; + error.init(); + + if (index_parameter) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (status) { + + // pairwise alignment of sequences + + dist_matrix = alignment(error , NULL , ivector_dist , I_DEFAULT , I_DEFAULT , + begin_free , end_free , indel_cost , indel_factor); + + if (dist_matrix) { + + // construction of a dendrogram on the basis of the matrix of pairwise alignment of sequences + + if (strategy != DIVISIVE) { + dendrogram = dist_matrix->agglomerative_hierarchical_clustering(strategy); + } + else { + dendrogram = dist_matrix->divisive_hierarchical_clustering(); + } + + if (os) { + *os << *dendrogram << "\n"; + } + + vector_dist = new VectorDistance(ivector_dist); + + // computation of the maximum substitution distance for nominal variables and + // the ranks for ordinal variables + + rank = new double*[nb_variable]; + max_category_distance = new double*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + if ((vector_dist->get_var_type(i) == NOMINAL) && (vector_dist->get_category_distance(i))) { + max_category_distance[i] = vector_dist->max_category_distance_computation(i); + } + else { + max_category_distance[i] = 0; + } + + if (vector_dist->get_var_type(i) == ORDINAL) { + rank[i] = marginal_distribution[i]->rank_computation(); + } + else { + rank[i] = NULL; + } + + // computation of the dispersion measures for standardization + + if (marginal_distribution[i]) { + vector_dist->dispersion_computation(i , marginal_distribution[i] , rank[i]); + } + + else { + switch (vector_dist->get_distance_type()) { + case ABSOLUTE_VALUE : + vector_dist->dispersion_update(i , mean_absolute_difference_computation(i)); + break; + case QUADRATIC : + vector_dist->dispersion_update(i , 2 * variance_computation(i , mean_computation(i))); + break; + } + + if (vector_dist->get_dispersion(i) == 0.) { + vector_dist->dispersion_update(i , 1.); + } + } + } + + // construction of the initial groups + + itype = new int[nb_variable + 1]; + for (i = 0;i < nb_variable;i++) { + itype[i] = type[i]; + } + itype[nb_variable] = INT_VALUE; + + clustered_seq = new Sequences*[2 * nb_sequence - 1]; + for (i = 0;i < nb_sequence;i++) { + clustered_seq[i] = new Sequences(1 , &identifier[i] , &length[i] , + nb_variable + 1 , itype); + + for (j = 0;j < nb_variable;j++) { + clustered_seq[i]->min_value[j] = min_value[j]; + clustered_seq[i]->max_value[j] = max_value[j]; + + psequence = clustered_seq[i]->int_sequence[0][j]; + csequence = int_sequence[i][j]; + for (k = 0;k < length[i];k++) { + *psequence++ = *csequence++; + } + } + + psequence = clustered_seq[i]->int_sequence[0][nb_variable]; + for (j = 0;j < length[i];j++) { + *psequence++ = DATA; + } + } + + // multiple alignment of sequences + + for (i = nb_sequence;i < 2 * nb_sequence - 1;i++) { + clustered_seq[i] = clustered_seq[dendrogram->get_child(i , 0)]->multiple_alignment(*(clustered_seq[dendrogram->get_child(i , 1)]) , + *vector_dist , rank , + max_category_distance , begin_free , + end_free , indel_cost , indel_factor); + +# ifdef DEBUG + if (i < 2 * nb_sequence - 2) { + clustered_seq[i]->multiple_alignment_ascii_print(cout); + } +# endif + + } + + // writing of the multiple alignment + + if (os) { + clustered_seq[2 * nb_sequence - 2]->multiple_alignment_ascii_print(*os); + } + + if (!path.empty()) { + status = clustered_seq[2 * nb_sequence - 2]->multiple_alignment_ascii_print(error , path); + if ((!status) && (os)) { + *os << error; + } + } + + seq = new Sequences(nb_sequence , clustered_seq[2 * nb_sequence - 2]->identifier , + clustered_seq[2 * nb_sequence - 2]->length , nb_variable , itype); + + variable = new int[nb_variable]; + for (i = 0;i < nb_variable;i++) { + variable[i] = i; + } + seq->select_variable(*(clustered_seq[2 * nb_sequence - 2]) , variable); + + for (i = 0;i < nb_variable;i++) { + (seq->max_value[i])++; + seq->build_marginal_frequency_distribution(i); + } + + delete [] variable; + + for (i = 0;i < 2 * nb_sequence - 1;i++) { + delete clustered_seq[i]; + } + delete [] clustered_seq; + + delete [] itype; + + delete dendrogram; + + delete vector_dist; + + for (i = 0;i < nb_variable;i++) { + delete [] rank[i]; + delete [] max_category_distance[i]; + } + delete [] rank; + delete [] max_category_distance; + } + + delete dist_matrix; + } + + return seq; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/categorical_sequence_process1.cpp b/src/cpp/sequence_analysis/categorical_sequence_process1.cpp new file mode 100644 index 0000000..0ee41a0 --- /dev/null +++ b/src/cpp/sequence_analysis/categorical_sequence_process1.cpp @@ -0,0 +1,1018 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include + +#include "stat_tool/stat_label.h" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the distributions of a CategoricalSequenceProcess object + * except the observation distributions. + * + * \param[in] ilength sequence length distribution, + * \param[in] homogeneity state homogeneity, + * \param[in] counting_flag flag counting distributions. + */ +/*--------------------------------------------------------------*/ + +void CategoricalSequenceProcess::create_characteristic(const Distribution &ilength , + bool *homogeneity , + bool counting_flag) + +{ + bool homogeneous = true; + int i; + int max_length = ilength.nb_value - 1; + + + if (length != NULL) { + delete length; + length = NULL; + } + length = new Distribution(ilength); + + if (index_value != NULL) { + delete index_value; + index_value = NULL; + } + index_value = new Curves(nb_value , max_length , false , false , false); + + if (no_occurrence != NULL) { + delete [] no_occurrence; + } + no_occurrence = new double[nb_value]; + + for (i = 0;i < nb_value;i++) { + no_occurrence[i] = 0.; + } + + if (first_occurrence != NULL) { + for (i = 0;i < nb_value;i++) { + delete first_occurrence[i]; + first_occurrence[i] = NULL; + } + delete [] first_occurrence; + first_occurrence = NULL; + } + first_occurrence = new Distribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + first_occurrence[i] = new Distribution(NB_VALUE); + } + + if (absorption != NULL) { + delete [] absorption; + absorption = NULL; + } + + absorption = new double[nb_value]; + + for (i = 0;i < nb_value;i++) { + absorption[i] = 0.; + } + + if (sojourn_time != NULL) { + for (i = 0;i < nb_value;i++) { + delete sojourn_time[i]; + sojourn_time[i] = NULL; + } + delete [] sojourn_time; + sojourn_time = NULL; + } + + sojourn_time = new DiscreteParametric*[nb_value]; + + for (i = 0;i < nb_value;i++) { + if (homogeneity[i]) { + sojourn_time[i] = new DiscreteParametric(NB_VALUE); + } + else { + sojourn_time[i] = NULL; + homogeneous = false; + } + } + + if (homogeneous) { + if (leave != NULL) { + delete [] leave; + leave = NULL; + } + leave = new double[nb_value]; + + for (i = 0;i < nb_value;i++) { + leave[i] = 0.; + } + + if (recurrence_time != NULL) { + for (i = 0;i < nb_value;i++) { + delete recurrence_time[i]; + recurrence_time[i] = NULL; + } + delete [] recurrence_time; + recurrence_time = NULL; + } + recurrence_time = new Distribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + recurrence_time[i] = new Distribution(NB_VALUE); + } + } + + if (counting_flag) { + if (nb_run != NULL) { + for (i = 0;i < nb_value;i++) { + delete nb_run[i]; + nb_run[i] = NULL; + } + delete [] nb_run; + } + nb_run = new Distribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + nb_run[i] = new Distribution((max_length % 2 == 0 ? + max_length / 2 : max_length / 2 + 1) + 1); + } + + if (nb_occurrence != NULL) { + for (i = 0;i < nb_value;i++) { + delete nb_occurrence[i]; + nb_occurrence[i] = NULL; + } + delete [] nb_occurrence; + nb_occurrence = NULL; + } + + nb_occurrence = new Distribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + nb_occurrence[i] = new Distribution(max_length + 1); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the distributions of a CategoricalSequenceProcess object + * except the observation distributions. + * + * \param[in] ilength sequence length distribution, + * \param[in] sojourn_time_flag flag sojourn time distributions, + * \param[in] counting_flag flag counting distributions. + */ +/*--------------------------------------------------------------*/ + +void CategoricalSequenceProcess::create_characteristic(const Distribution &ilength , + bool sojourn_time_flag , bool counting_flag) + +{ + int i; + int max_length = ilength.nb_value - 1; + + + if (length != NULL) { + delete length; + length = NULL; + } + length = new Distribution(ilength); + + if (index_value != NULL) { + delete index_value; + index_value = NULL; + } + index_value = new Curves(nb_value , max_length , false , false , false); + + if (no_occurrence != NULL) { + delete [] no_occurrence; + no_occurrence = NULL; + } + no_occurrence = new double[nb_value]; + + for (i = 0;i < nb_value;i++) { + no_occurrence[i] = 0.; + } + + if (first_occurrence != NULL) { + for (i = 0;i < nb_value;i++) { + delete first_occurrence[i]; + first_occurrence[i] = NULL; + } + delete [] first_occurrence; + first_occurrence = NULL; + } + first_occurrence = new Distribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + first_occurrence[i] = new Distribution(NB_VALUE); + } + + if (leave != NULL) { + delete [] leave; + leave = NULL; + } + + leave = new double[nb_value]; + + for (i = 0;i < nb_value;i++) { + leave[i] = 0.; + } + + if (recurrence_time != NULL) { + for (i = 0;i < nb_value;i++) { + delete recurrence_time[i]; + recurrence_time[i] = NULL; + } + delete [] recurrence_time; + recurrence_time = NULL; + } + recurrence_time = new Distribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + recurrence_time[i] = new Distribution(NB_VALUE); + } + + if (sojourn_time_flag) { + if (absorption != NULL) { + delete [] absorption; + absorption = NULL; + } + absorption = new double[nb_value]; + + for (i = 0;i < nb_value;i++) { + absorption[i] = 0.; + } + + if (sojourn_time != NULL) { + for (i = 0;i < nb_value;i++) { + delete sojourn_time[i]; + sojourn_time[i] = NULL; + } + delete [] sojourn_time; + sojourn_time = NULL; + } + sojourn_time = new DiscreteParametric*[nb_value]; + + for (i = 0;i < nb_value;i++) { + sojourn_time[i] = new DiscreteParametric(NB_VALUE); + } + } + + if (counting_flag) { + if (nb_run != NULL) { + for (i = 0;i < nb_value;i++) { + delete nb_run[i]; + nb_run[i] = NULL; + } + } + nb_run = new Distribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + nb_run[i] = new Distribution((max_length % 2 == 0 ? + max_length / 2 : max_length / 2 + 1) + 1); + } + + if (nb_occurrence != NULL) { + for (i = 0;i < nb_value;i++) { + delete nb_occurrence[i]; + nb_occurrence[i] = NULL; + } + delete [] nb_occurrence; + nb_occurrence = NULL; + } + + nb_occurrence = new Distribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + nb_occurrence[i] = new Distribution(max_length + 1); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the CategoricalSequenceProcess class. + * + * \param[in] inb_state number of states, + * \param[in] inb_value number of categories, + * \param[in] observation_flag flag observation distributions. + */ +/*--------------------------------------------------------------*/ + +CategoricalSequenceProcess::CategoricalSequenceProcess(int inb_state , int inb_value , + int observation_flag) +:CategoricalProcess(inb_state , inb_value , observation_flag) + +{ + length = NULL; + index_value = NULL; + no_occurrence = NULL; + first_occurrence = NULL; + leave = NULL; + recurrence_time = NULL; + absorption = NULL; + sojourn_time = NULL; + nb_run = NULL; + nb_occurrence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the CategoricalSequenceProcess class. + * + * \param[in] inb_state number of states, + * \param[in] occupancy state occupancy distributions. + */ +/*--------------------------------------------------------------*/ + +CategoricalSequenceProcess::CategoricalSequenceProcess(int inb_state , + DiscreteParametric **occupancy) + +{ + int i; + + + nb_state = inb_state; + nb_value = inb_state; + + observation = NULL; + + length = NULL; + index_value = NULL; + no_occurrence = NULL; + first_occurrence = NULL; + leave = NULL; + recurrence_time = NULL; + + absorption = new double[nb_state]; + sojourn_time = new DiscreteParametric*[nb_state]; + for (i = 0;i < nb_state;i++) { + if (occupancy[i]) { + sojourn_time[i] = new DiscreteParametric(*occupancy[i]); + } + else { + sojourn_time[i] = NULL; + } + } + + nb_run = NULL; + nb_occurrence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a CategoricalSequenceProcess object from + * a CategoricalProcess object. + * + * \param[in] process reference on a CategoricalProcess object. + */ +/*--------------------------------------------------------------*/ + +CategoricalSequenceProcess::CategoricalSequenceProcess(const CategoricalProcess &process) +:CategoricalProcess(process) + +{ + length = NULL; + index_value = NULL; + no_occurrence = NULL; + first_occurrence = NULL; + leave = NULL; + recurrence_time = NULL; + absorption = NULL; + sojourn_time = NULL; + nb_run = NULL; + nb_occurrence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a CategoricalSequenceProcess object. + * + * \param[in] process reference on a CategoricalSequenceProcess object, + * \param[in] characteristic_flag flag copy of the characteristic distributions. + */ +/*--------------------------------------------------------------*/ + +void CategoricalSequenceProcess::copy(const CategoricalSequenceProcess &process , + bool characteristic_flag) + +{ + if (characteristic_flag) { + int i; + + + if (process.length) { + length = new Distribution(*(process.length)); + } + else { + length = NULL; + } + + if (process.index_value) { + index_value = new Curves(*(process.index_value)); + } + else { + index_value = NULL; + } + + if (process.no_occurrence) { + no_occurrence = new double[nb_value]; + for (i = 0;i < nb_value;i++) { + no_occurrence[i] = process.no_occurrence[i]; + } + } + else { + no_occurrence = NULL; + } + + if (process.first_occurrence) { + first_occurrence = new Distribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + first_occurrence[i] = new Distribution(*(process.first_occurrence[i])); + } + } + else { + first_occurrence = NULL; + } + + if (process.leave) { + leave = new double[nb_value]; + for (i = 0;i < nb_value;i++) { + leave[i] = process.leave[i]; + } + } + else { + leave = NULL; + } + + if (process.recurrence_time) { + recurrence_time = new Distribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + if (process.recurrence_time[i]) { + recurrence_time[i] = new Distribution(*(process.recurrence_time[i])); + } + else { + recurrence_time[i] = NULL; + } + } + } + else { + recurrence_time = NULL; + } + + if (process.absorption) { + absorption = new double[nb_value]; + for (i = 0;i < nb_value;i++) { + absorption[i] = process.absorption[i]; + } + } + else { + absorption = NULL; + } + + if (process.sojourn_time) { + sojourn_time = new DiscreteParametric*[nb_value]; + for (i = 0;i < nb_value;i++) { + if (process.sojourn_time[i]) { + sojourn_time[i] = new DiscreteParametric(*(process.sojourn_time[i])); + } + else { + sojourn_time[i] = NULL; + } + } + } + else { + sojourn_time = NULL; + } + + if (process.nb_run) { + nb_run = new Distribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + nb_run[i] = new Distribution(*(process.nb_run[i])); + } + } + else { + nb_run = NULL; + } + + if (process.nb_occurrence) { + nb_occurrence = new Distribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + nb_occurrence[i] = new Distribution(*(process.nb_occurrence[i])); + } + } + else { + nb_occurrence = NULL; + } + } + + else { + length = NULL; + index_value = NULL; + no_occurrence = NULL; + first_occurrence = NULL; + leave = NULL; + recurrence_time = NULL; + absorption = NULL; + sojourn_time = NULL; + nb_run = NULL; + nb_occurrence = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of the state occupancy distributions. + * + * \param[in] process reference on a CategoricalSequenceProcess object, + * \param[in] occupancy_nb_value number of allocated values for the state occupancy distributions. + */ +/*--------------------------------------------------------------*/ + +void CategoricalSequenceProcess::init_occupancy(const CategoricalSequenceProcess &process , + int occupancy_nb_value) + +{ + int i; + + + nb_state = process.nb_state; + nb_value = process.nb_value; + + observation = NULL; + + length = NULL; + index_value = NULL; + no_occurrence = NULL; + first_occurrence = NULL; + leave = NULL; + recurrence_time = NULL; + + absorption = new double[nb_value]; + sojourn_time = new DiscreteParametric*[nb_value]; + for (i = 0;i < nb_value;i++) { + absorption[i] = process.absorption[i]; + if ((process.sojourn_time[i]) && (process.sojourn_time[i]->ident != CATEGORICAL)) { + sojourn_time[i] = new DiscreteParametric(*(process.sojourn_time[i]) , + DISTRIBUTION_COPY , occupancy_nb_value); + } + else { + sojourn_time[i] = NULL; + } + } + + nb_run = NULL; + nb_occurrence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor by copy of the CategoricalSequenceProcess class. + * + * \param[in] process reference on a CategoricalSequenceProcess object, + * \param[in] transform type of transform (CATEGORICAL_SEQUENCE_PROCESS_COPY/INIT_OCCUPANCY), + * \param[in] param flag on the computation of the characteristic distributions/ + * number of allocated values for the state occupancy distributions. + */ +/*--------------------------------------------------------------*/ + +CategoricalSequenceProcess::CategoricalSequenceProcess(const CategoricalSequenceProcess &process , + categorical_sequence_process_transformation transform , + int param) +: length(NULL), + index_value(NULL), + no_occurrence(NULL), + first_occurrence(NULL), + leave(NULL), + recurrence_time(NULL), + absorption(NULL), + sojourn_time(NULL), + nb_run(NULL), + nb_occurrence(NULL) + +{ + switch (transform) { + case CATEGORICAL_SEQUENCE_PROCESS_COPY : + CategoricalProcess::copy(process); + copy(process , param); + break; + case INIT_OCCUPANCY : + init_occupancy(process , param); + break; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a CategoricalSequenceProcess object. + */ +/*--------------------------------------------------------------*/ + +void CategoricalSequenceProcess::remove() + +{ + int i; + + + if (length) { + delete length; + + length = NULL; + } + + if (index_value) { + delete index_value; + + index_value = NULL; + } + + if (no_occurrence) { + delete [] no_occurrence; + + no_occurrence = NULL; + } + + if (first_occurrence) { + for (i = 0;i < nb_value;i++) { + delete first_occurrence[i]; + first_occurrence[i] = NULL; + } + delete [] first_occurrence; + + first_occurrence = NULL; + } + + if (leave) { + delete [] leave; + + leave = NULL; + } + + if (recurrence_time) { + for (i = 0;i < nb_value;i++) { + delete recurrence_time[i]; + recurrence_time[i] = NULL; + } + delete [] recurrence_time; + + recurrence_time = NULL; + } + + if (absorption) { + delete [] absorption; + + absorption = NULL; + } + + if (sojourn_time) { + for (i = 0;i < nb_value;i++) { + delete sojourn_time[i]; + sojourn_time[i] = NULL; + } + delete [] sojourn_time; + + sojourn_time = NULL; + } + + if (nb_run) { + for (i = 0;i < nb_value;i++) { + delete nb_run[i]; + nb_run[i] = NULL; + } + delete [] nb_run; + + nb_run = NULL; + } + + if (nb_occurrence) { + for (i = 0;i < nb_value;i++) { + delete nb_occurrence[i]; + nb_occurrence[i] = NULL; + } + delete [] nb_occurrence; + + nb_occurrence = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the CategoricalSequenceProcess class. + */ +/*--------------------------------------------------------------*/ + +CategoricalSequenceProcess::~CategoricalSequenceProcess() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the CategoricalSequenceProcess class. + * + * \param[in] process reference on a CategoricalSequenceProcess object. + * + * \return CategoricalSequenceProcess object. + */ +/*--------------------------------------------------------------*/ + +CategoricalSequenceProcess& CategoricalSequenceProcess::operator=(const CategoricalSequenceProcess &process) + +{ + if (&process != this) { + remove(); + CategoricalProcess::remove(); + + CategoricalProcess::copy(process); + copy(process); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief test model hidden. + * + * \param[in] nb_output_process number of observation processes, + * \param[in] process pointer on the observation processes. + * + * \return model hidden or not. + */ +/*--------------------------------------------------------------*/ + +bool CategoricalSequenceProcess::test_hidden(int nb_output_process , CategoricalSequenceProcess **process) + +{ + bool hidden = false; + int i; + + + for (i = 0;i < nb_output_process;i++) { + if (process[i]) { + hidden = process[i]->CategoricalProcess::test_hidden(); + if (hidden) { + break; + } + } + + else { + hidden = true; + break; + } + } + + return hidden; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Analysis of the format of state occupancy distributions. + * + * \param[in] error reference on a StatError object, + * \param[in] in_file stream, + * \param[in] line reference on the file line index, + * \param[in] chain reference on a Chain object, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + * + * \return CategoricalSequenceProcess object. + */ +/*--------------------------------------------------------------*/ + +CategoricalSequenceProcess* CategoricalSequenceProcess::occupancy_parsing(StatError &error , ifstream &in_file , + int &line , const Chain &chain , + double cumul_threshold) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + bool status = true , lstatus; + int i , j; + int index; + DiscreteParametric **dist; + CategoricalSequenceProcess *process; + + + process = NULL; + + dist = new DiscreteParametric*[chain.nb_state]; + for (i = 0;i < chain.nb_state;i++) { + dist[i] = NULL; + } + + for (i = 0;i < chain.nb_state;i++) { + if (chain.transition[i][i] == 0.) { + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + j = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (j) { + + // test STATE keyword + + case 0 : { + if (*token != STAT_word[STATW_STATE]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_STATE] , line , j + 1); + } + break; + } + + // test state index + + case 1 : { + lstatus = true; + +/* try { + index = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + index = atoi(token->c_str()); + + if ((lstatus) && (index != i)) { + lstatus = false; + } + + if (!lstatus) { + status = false; + error.correction_update(STAT_parsing[STATP_STATE_INDEX] , i , line , j + 1); + } + break; + } + + // test OCCUPANCY_DISTRIBUTION keyword + + case 2 : { + if (*token != SEQ_word[SEQW_OCCUPANCY_DISTRIBUTION]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , SEQ_word[SEQW_OCCUPANCY_DISTRIBUTION] , line , j + 1); + } + break; + } + } + + j++; + } + + if (j > 0) { + if (j != 3) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + dist[i] = DiscreteParametric::parsing(error , in_file , line , UNIFORM , + cumul_threshold , 1); + if (!dist[i]) { + status = false; + } + else if (dist[i]->mean == 1.) { + delete dist[i]; + dist[i] = NULL; + } + + break; + } + } + + if ((j == 0) && (!dist[i])) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + } + + if (status) { + process = new CategoricalSequenceProcess(chain.nb_state , dist); + } + + for (i = 0;i < chain.nb_state;i++) { + delete dist[i]; + } + delete [] dist; + dist = NULL; + + return process; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the marginal state distribution for an ordinary process. + * + * \return Distribution object. + */ +/*--------------------------------------------------------------*/ + +Distribution* CategoricalSequenceProcess::weight_computation() const + +{ + int i , j; + double sum; + Distribution *weight; + + + weight = new Distribution(nb_state); + + for (i = 0;i < nb_state;i++) { + weight->mass[i] = 0.; + } + + for (i = 0;i < length->nb_value - 1;i++) { + for (j = 0;j < nb_state;j++) { + weight->mass[j] += index_value->point[j][i] * (1. - length->cumul[i]); + } + } + + sum = 0.; + for (i = 0;i < nb_state;i++) { + sum += weight->mass[i]; + } + for (i = 0;i < nb_state;i++) { + weight->mass[i] /= sum; + } + + weight->cumul_computation(); + weight->max_computation(); + + return weight; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/categorical_sequence_process2.cpp b/src/cpp/sequence_analysis/categorical_sequence_process2.cpp new file mode 100644 index 0000000..d410d1f --- /dev/null +++ b/src/cpp/sequence_analysis/categorical_sequence_process2.cpp @@ -0,0 +1,4019 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a CategoricalSequenceProcess object. + * + * \param[in,out] os stream, + * \param[in] process observation process index, + * \param[in] empirical_observation pointer on the observation frequency distributions, + * \param[in] marginal_distribution pointer on the marginal frequency distribution, + * \param[in] characteristics pointer on the observed sequences characteristics, + * \param[in] exhaustive flag detail level, + * \param[in] file_flag flag file, + * \param[in] forward pointer on the forward sojourn time distributions. + */ +/*--------------------------------------------------------------*/ + +ostream& CategoricalSequenceProcess::ascii_print(ostream &os , int process , + FrequencyDistribution **empirical_observation , + FrequencyDistribution *marginal_distribution , + const SequenceCharacteristics *characteristics , + bool exhaustive , bool file_flag , + Forward **forward) const + +{ + int i , j; + int buff , width[2]; + double scale[NB_STATE]; + const Distribution *pobservation[NB_STATE]; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::left , ios::adjustfield); + + if (observation) { + for (i = 0;i < nb_state;i++) { + os << "\n" << STAT_word[STATW_STATE] << " " << i << " " + << STAT_word[STATW_OBSERVATION_DISTRIBUTION] << endl; + for (j = observation[i]->offset;j < observation[i]->nb_value;j++) { + if (observation[i]->mass[j] > 0.) { + os << STAT_word[STATW_OUTPUT] << " " << j << " : " << observation[i]->mass[j] << endl; + } + } + + if ((empirical_observation) && (empirical_observation[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << " | " << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + + observation[i]->ascii_print(os , file_flag , false , false , empirical_observation[i]); + } + } + + // computation of the column widths + + width[0] = column_width(nb_state - 1) + ASCII_SPACE; + + width[1] = 0; + for (i = 0;i < nb_state;i++) { + buff = column_width(observation[i]->nb_value , observation[i]->mass); + if (buff > width[1]) { + width[1] = buff; + } + } + width[1] += ASCII_SPACE; + + // writing of the observation probability matrix + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_OBSERVATION_PROBABILITIY_MATRIX] << endl; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << setw(width[0]) << " "; + for (i = 0;i < nb_value;i++) { + os << setw(width[1]) << i; + } + + for (i = 0;i < nb_state;i++) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << setw(width[0]) << i; + for (j = 0;j < nb_value;j++) { + os << setw(width[1]) << observation[i]->mass[j]; + } + } + os << endl; + + if (marginal_distribution) { + double likelihood , information; + Test test(CHI2); + + + if ((weight) && (mixture)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_THEORETICAL] << " " << STAT_label[STATL_WEIGHTS] << ":"; + + for (i = 0;i < nb_state;i++) { + os << " " << weight->mass[i]; + } + os << endl; + + likelihood = mixture->likelihood_computation(*marginal_distribution); + information = marginal_distribution->information_computation(); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << likelihood / marginal_distribution->nb_element << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_MAX_LIKELIHOOD] << ": " << information << " (" + << STAT_label[STATL_INFORMATION] << ": " << information / marginal_distribution->nb_element << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_DEVIANCE] << ": " << 2 * (information - likelihood) << endl; + + mixture->chi2_fit(*marginal_distribution , test); + os << "\n"; + test.ascii_print(os , file_flag); + + if (exhaustive) { + for (i = 0;i < nb_state;i++) { + pobservation[i] = observation[i]; + scale[i] = weight->mass[i] * marginal_distribution->nb_element; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + for (i = 0;i < nb_state;i++) { + os << " | " << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION] + << " " << STAT_label[STATL_DISTRIBUTION]; + } + os << " | " << STAT_label[STATL_MIXTURE] << " | " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] + << " " << STAT_label[STATL_FUNCTION] << endl; + + mixture->ascii_print(os , nb_state , pobservation , scale , + file_flag , true , marginal_distribution); + } + } + + if ((restoration_weight) && (restoration_mixture)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_RESTORATION] << " " << STAT_label[STATL_WEIGHTS] << ":"; + + for (i = 0;i < nb_state;i++) { + os << " " << restoration_weight->mass[i]; + } + os << endl; + + likelihood = restoration_mixture->likelihood_computation(*marginal_distribution); + information = marginal_distribution->information_computation(); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << likelihood / marginal_distribution->nb_element << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_MAX_LIKELIHOOD] << ": " << information << " (" + << STAT_label[STATL_INFORMATION] << ": " << information / marginal_distribution->nb_element << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_DEVIANCE] << ": " << 2 * (information - likelihood) << endl; + + restoration_mixture->chi2_fit(*marginal_distribution , test); + os << "\n"; + test.ascii_print(os , file_flag); + + if (exhaustive) { + for (i = 0;i < nb_state;i++) { + pobservation[i] = observation[i]; + scale[i] = restoration_weight->mass[i] * marginal_distribution->nb_element; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + for (i = 0;i < nb_state;i++) { + os << " | " << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION] + << " " << STAT_label[STATL_DISTRIBUTION]; + } + os << " | " << STAT_label[STATL_MIXTURE] << " | " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] + << " " << STAT_label[STATL_FUNCTION] << endl; + + restoration_mixture->ascii_print(os , nb_state , pobservation , scale , + file_flag , true , marginal_distribution); + } + } + } + } + + if (((index_value) || (characteristics)) && (exhaustive)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + + for (i = 0;i < nb_value;i++) { + if ((characteristics) && (i < characteristics->nb_value)) { + os << " | " << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i; + } + if (index_value) { + os << " | " << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i; + } + } + if (characteristics) { + os << " | " << STAT_label[STATL_FREQUENCY]; + } + os << endl; + + if (index_value) { + index_value->ascii_print(os , file_flag , + (characteristics ? characteristics->index_value : NULL)); + } + else { + characteristics->index_value->ascii_print(os , file_flag); + } + } + + if ((first_occurrence) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if (first_occurrence) { + if (no_occurrence[i] > 0.) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NO_OCCURRENCE] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << ": " << no_occurrence[i] << endl; + } + + if (first_occurrence[i]) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_DISTRIBUTION] << endl; + first_occurrence[i]->ascii_characteristic_print(os , false , file_flag); + } + } + + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + characteristics->first_occurrence[i]->ascii_characteristic_print(os , false , file_flag); + } + + if ((((first_occurrence) && (first_occurrence[i])) || + ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0))) && (exhaustive)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + os << " | " << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if ((first_occurrence) && (first_occurrence[i])) { + os << " | " << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_DISTRIBUTION]; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + first_occurrence[i]->ascii_print(os , file_flag , true , true , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->first_occurrence[i]->nb_element > 0)) ? characteristics->first_occurrence[i] : NULL)); + } + + else { + os << endl; + characteristics->first_occurrence[i]->ascii_print(os , file_flag); + } + } + } + } + + if ((recurrence_time) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if (recurrence_time) { + if (leave[i] > 0.) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_LEAVING] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << ": " << leave[i] << endl; + } + + if (recurrence_time[i]) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + recurrence_time[i]->ascii_characteristic_print(os , false , file_flag); + } + } + + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + characteristics->recurrence_time[i]->ascii_characteristic_print(os , false , file_flag); + } + + if ((((recurrence_time) && (recurrence_time[i])) || + ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0))) && (exhaustive)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + os << " | " << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if ((recurrence_time) && (recurrence_time[i])) { + os << " | " << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + recurrence_time[i]->ascii_print(os , file_flag , true , true , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->recurrence_time[i]->nb_element > 0)) ? + characteristics->recurrence_time[i] : NULL)); + } + + else { + os << endl; + characteristics->recurrence_time[i]->ascii_print(os , file_flag); + } + } + } + } + + if ((sojourn_time) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if (sojourn_time) { + if (absorption[i] > 0.) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_ABSORPTION] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << ": " << absorption[i] << endl; + } + + if (sojourn_time[i]) { + if (sojourn_time[i]->ident != CATEGORICAL) { + os << "\n" << STAT_word[STATW_STATE] << " " << i << " " + << SEQ_word[SEQW_OCCUPANCY_DISTRIBUTION] << endl; + sojourn_time[i]->ascii_print(os); + } + + else { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + } + +# ifdef DEBUG + sojourn_time[i]->ascii_characteristic_print(os , (sojourn_time[i]->ident == CATEGORICAL ? false : true) , file_flag); +# endif + +// sojourn_time[i]->ascii_parametric_characteristic_print(os , true , file_flag); + sojourn_time[i]->ascii_parametric_characteristic_print(os , (sojourn_time[i]->ident == CATEGORICAL ? false : true) , file_flag); + +# ifdef MESSAGE + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_VARIATION_COEFF] << ": " << (sojourn_time[i]->ident == CATEGORICAL ? sqrt(sojourn_time[i]->variance) / sojourn_time[i]->mean : sqrt(sojourn_time[i]->parametric_variance_computation()) / sojourn_time[i]->parametric_mean_computation()) << endl; +# endif + + } + } + + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + characteristics->sojourn_time[i]->ascii_characteristic_print(os , false , file_flag); + } + + if ((((sojourn_time) && (sojourn_time[i])) || + ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0))) && (exhaustive)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + os << " | " << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if ((sojourn_time) && (sojourn_time[i])) { + os << " | " << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + sojourn_time[i]->Distribution::ascii_print(os , file_flag , true , false , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->sojourn_time[i]->nb_element > 0)) ? + characteristics->sojourn_time[i] : NULL)); + } + + else { + os << endl; + characteristics->sojourn_time[i]->ascii_print(os , file_flag); + } + } + + if ((forward) && (forward[i])) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_FORWARD] << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + forward[i]->ascii_characteristic_print(os , false , file_flag); + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->initial_run)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + characteristics->initial_run[i]->ascii_characteristic_print(os , false , file_flag); + + if ((characteristics->initial_run[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + + if ((forward) && (forward[i])) { + os << " | " << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_FORWARD] << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << " | " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + forward[i]->Distribution::ascii_print(os , file_flag , true , false , + characteristics->initial_run[i]); + } + + else { + os << endl; + characteristics->initial_run[i]->ascii_print(os , file_flag); + } + } + } + + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + characteristics->final_run[i]->ascii_characteristic_print(os , false , file_flag); + } + + if ((((forward) && (forward[i])) || + ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0))) && (exhaustive)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + os << " | " << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if ((forward) && (forward[i])) { + os << " | " << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_FORWARD] << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + forward[i]->Distribution::ascii_print(os , file_flag , true , false , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->final_run[i]->nb_element > 0)) ? + characteristics->final_run[i] : NULL)); + } + + else { + os << endl; + characteristics->final_run[i]->ascii_print(os , file_flag); + } + } + } + } + + if ((nb_run) || ((characteristics) && (characteristics->nb_run))) { + for (i = 0;i < nb_value;i++) { + if (nb_run) { + os << "\n"; + if (file_flag) { + os << "# "; + } + if (length->variance == 0.) { + os << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + } + else { + os << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_RUN_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTIONS] << endl; + } + nb_run[i]->ascii_characteristic_print(os , (length->variance > 0. ? false : true) , file_flag); + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_run)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + characteristics->nb_run[i]->ascii_characteristic_print(os , (length->variance > 0. ? false : true) , file_flag); + } + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if ((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_run) && + (characteristics->nb_run[i]->nb_element > 0)) { + os << " | " << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if (nb_run) { + if (length->variance == 0.) { + os << " | " << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + else { + os << " | " << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_RUN_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTIONS]; + } + if ((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_run) && + (characteristics->nb_run[i]->nb_element > 0)) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + if (length->variance == 0.) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + } + else { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] << " " + << STAT_label[STATL_FUNCTION] << endl; + } + + nb_run[i]->ascii_print(os , file_flag , true , false , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_run) && (characteristics->nb_run[i]->nb_element > 0)) ? characteristics->nb_run[i] : NULL)); + } + + else { + os << endl; + characteristics->nb_run[i]->ascii_print(os , file_flag); + } + } + } + } + + if ((nb_occurrence) || ((characteristics) && (characteristics->nb_occurrence))) { + for (i = 0;i < nb_value;i++) { + if (nb_occurrence) { + os << "\n"; + if (file_flag) { + os << "# "; + } + if (length->variance == 0.) { + os << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + } + else { + os << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_OCCURRENCE_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTIONS] << endl; + } + nb_occurrence[i]->ascii_characteristic_print(os , (length->variance > 0. ? false : true) , file_flag); + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + characteristics->nb_occurrence[i]->ascii_characteristic_print(os , (length->variance > 0. ? false : true) , file_flag); + } + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + os << " | " << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if (nb_occurrence) { + if (length->variance == 0.) { + os << " | " << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + else { + os << " | " << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_OCCURRENCE_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTIONS]; + } + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + if (length->variance == 0.) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + } + else { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] << " " + << STAT_label[STATL_FUNCTION] << endl; + } + + nb_occurrence[i]->ascii_print(os , file_flag , true , false , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_occurrence) && (characteristics->nb_occurrence[i]->nb_element > 0)) ? characteristics->nb_occurrence[i] : NULL)); + } + + else { + os << endl; + characteristics->nb_occurrence[i]->ascii_print(os , file_flag); + } + } + } + } + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a CategoricalSequenceProcess object at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] process observation process index, + * \param[in] empirical_observation pointer on the observation frequency distributions, + * \param[in] marginal_distribution pointer on the marginal frequency distribution, + * \param[in] characteristics pointer on the observed sequences characteristics, + * \param[in] forward pointer on the forward sojourn time distributions. + */ +/*--------------------------------------------------------------*/ + +ostream& CategoricalSequenceProcess::spreadsheet_print(ostream &os , int process , + FrequencyDistribution **empirical_observation , + FrequencyDistribution *marginal_distribution , + const SequenceCharacteristics *characteristics , + Forward **forward) const + +{ + int i , j; + double scale[NB_STATE]; + const Distribution *pobservation[NB_STATE]; + Curves *smoothed_curves; + + + if (observation) { + for (i = 0;i < nb_state;i++) { + os << "\n" << STAT_word[STATW_STATE] << " " << i << "\t" + << STAT_word[STATW_OBSERVATION_DISTRIBUTION] << endl; + for (j = observation[i]->offset;j < observation[i]->nb_value;j++) { + if (observation[i]->mass[j] > 0.) { + os << STAT_word[STATW_OUTPUT] << "\t" << j << "\t" << observation[i]->mass[j] << endl; + } + } + + if ((empirical_observation) && (empirical_observation[i]->nb_element > 0)) { + os << "\n\t" << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\t" << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + + observation[i]->spreadsheet_print(os , false , false , false , empirical_observation[i]); + } + } + + if (marginal_distribution) { + double likelihood , information; + Test test(CHI2); + + + if ((weight) && (mixture)) { + os << "\n" << STAT_label[STATL_THEORETICAL] << " " << STAT_label[STATL_WEIGHTS]; + for (i = 0;i < nb_state;i++) { + os << "\t" << weight->mass[i]; + } + os << endl; + + likelihood = mixture->likelihood_computation(*marginal_distribution); + information = marginal_distribution->information_computation(); + + os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << likelihood / marginal_distribution->nb_element << endl; + os << STAT_label[STATL_MAX_LIKELIHOOD] << "\t" << information << "\t" + << STAT_label[STATL_INFORMATION] << "\t" << information / marginal_distribution->nb_element << endl; + os << STAT_label[STATL_DEVIANCE] << "\t" << 2 * (information - likelihood) << endl; + + mixture->chi2_fit(*marginal_distribution , test); + os << "\n"; + test.spreadsheet_print(os); + + for (i = 0;i < nb_state;i++) { + pobservation[i] = observation[i]; + scale[i] = weight->mass[i] * marginal_distribution->nb_element; + } + + os << "\n\t" << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + for (i = 0;i < nb_state;i++) { + os << "\t" << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION] + << " " << STAT_label[STATL_DISTRIBUTION]; + } + os << "\t" << STAT_label[STATL_MIXTURE] << "\t" << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] + << " " << STAT_label[STATL_FUNCTION] << endl; + + mixture->spreadsheet_print(os , nb_state , pobservation , scale , true , + marginal_distribution); + } + + if ((restoration_weight) && (restoration_mixture)) { + os << "\n" << STAT_label[STATL_RESTORATION] << " " << STAT_label[STATL_WEIGHTS]; + for (i = 0;i < nb_state;i++) { + os << "\t" << restoration_weight->mass[i]; + } + os << endl; + + likelihood = restoration_mixture->likelihood_computation(*marginal_distribution); + information = marginal_distribution->information_computation(); + + os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << likelihood / marginal_distribution->nb_element << endl; + os << STAT_label[STATL_MAX_LIKELIHOOD] << "\t" << information << "\t" + << STAT_label[STATL_INFORMATION] << "\t" << information / marginal_distribution->nb_element << endl; + os << STAT_label[STATL_DEVIANCE] << "\t" << 2 * (information - likelihood) << endl; + + restoration_mixture->chi2_fit(*marginal_distribution , test); + os << "\n"; + test.spreadsheet_print(os); + + for (i = 0;i < nb_state;i++) { + pobservation[i] = observation[i]; + scale[i] = restoration_weight->mass[i] * marginal_distribution->nb_element; + } + + os << "\n\t" << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + for (i = 0;i < nb_state;i++) { + os << "\t" << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION] + << " " << STAT_label[STATL_DISTRIBUTION]; + } + os << "\t" << STAT_label[STATL_MIXTURE] << "\t" << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] + << " " << STAT_label[STATL_FUNCTION] << endl; + + restoration_mixture->spreadsheet_print(os , nb_state , pobservation , scale , true , + marginal_distribution); + } + } + } + + if ((index_value) || (characteristics)) { + os << "\n"; + for (i = 0;i < nb_value;i++) { + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i; + } + if (index_value) { + os << "\t" << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i; + } + } + if (characteristics) { + os << "\t" << STAT_label[STATL_FREQUENCY]; + } + os << endl; + + if (index_value) { + index_value->spreadsheet_print(os , (characteristics ? characteristics->index_value : NULL)); + } + else { + characteristics->index_value->spreadsheet_print(os); + } + + if (characteristics) { + smoothed_curves = new Curves(*(characteristics->index_value) , SMOOTHING); + + os << "\n" << SEQ_label[SEQL_SMOOTHED_OBSERVED_PROBABILITIES] << endl; + for (i = 0;i < nb_value;i++) { + if (i < characteristics->nb_value) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i; + } + if (index_value) { + os << "\t" << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i; + } + } + os << "\t" << STAT_label[STATL_FREQUENCY] << endl; + + if (index_value) { + index_value->spreadsheet_print(os , smoothed_curves); + } + else { + smoothed_curves->spreadsheet_print(os); + } + + delete smoothed_curves; + } + } + + if ((first_occurrence) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if (first_occurrence) { + if (no_occurrence[i] > 0.) { + os << "\n" << SEQ_label[SEQL_NO_OCCURRENCE] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << ": " << no_occurrence[i] << endl; + } + + if (first_occurrence[i]) { + os << "\n" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_DISTRIBUTION] << endl; + first_occurrence[i]->spreadsheet_characteristic_print(os); + } + } + + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\n" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + characteristics->first_occurrence[i]->spreadsheet_characteristic_print(os); + } + + if (((first_occurrence) && (first_occurrence[i])) || + ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0))) { + os << "\n"; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + os << "\t" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if ((first_occurrence) && (first_occurrence[i])) { + os << "\t" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_DISTRIBUTION]; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + first_occurrence[i]->spreadsheet_print(os , true , false , true , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->first_occurrence[i]->nb_element > 0)) ? characteristics->first_occurrence[i] : NULL)); + } + + else { + os << endl; + characteristics->first_occurrence[i]->spreadsheet_print(os); + } + } + } + } + + if ((recurrence_time) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if (recurrence_time) { + if (leave[i] > 0.) { + os << "\n" << SEQ_label[SEQL_LEAVING] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << ": " << leave[i] << endl; + } + + if (recurrence_time[i]) { + os << "\n" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + recurrence_time[i]->spreadsheet_characteristic_print(os); + } + } + + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\n" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + characteristics->recurrence_time[i]->spreadsheet_characteristic_print(os); + } + + if (((recurrence_time) && (recurrence_time[i])) || + ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0))) { + os << "\n"; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + os << "\t" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if ((recurrence_time) && (recurrence_time[i])) { + os << "\t" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + recurrence_time[i]->spreadsheet_print(os , true , false , true , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->recurrence_time[i]->nb_element > 0)) ? + characteristics->recurrence_time[i] : NULL)); + } + + else { + os << endl; + characteristics->recurrence_time[i]->spreadsheet_print(os); + } + } + } + } + + if ((sojourn_time) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if (sojourn_time) { + if (absorption[i] > 0.) { + os << "\n" << SEQ_label[SEQL_ABSORPTION] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << ": " << absorption[i] << endl; + } + + if (sojourn_time[i]) { + if (sojourn_time[i]->ident != CATEGORICAL) { + os << "\n" << STAT_word[STATW_STATE] << " " << i << "\t" + << SEQ_word[SEQW_OCCUPANCY_DISTRIBUTION] << endl; + sojourn_time[i]->spreadsheet_print(os); + } + else { + os << "\n" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + } + + sojourn_time[i]->spreadsheet_parametric_characteristic_print(os , (sojourn_time[i]->ident == CATEGORICAL ? false : true)); + } + } + + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\n" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + characteristics->sojourn_time[i]->spreadsheet_characteristic_print(os); + } + + if (((sojourn_time) && (sojourn_time[i])) || + ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0))) { + os << "\n"; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + os << "\t" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if ((sojourn_time) && (sojourn_time[i])) { + os << "\t" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + sojourn_time[i]->Distribution::spreadsheet_print(os , true , false , false , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->sojourn_time[i]->nb_element > 0)) ? + characteristics->sojourn_time[i] : NULL)); + } + + else { + os << endl; + characteristics->sojourn_time[i]->spreadsheet_print(os); + } + } + + if ((forward) && (forward[i])) { + os << "\n" << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_FORWARD] << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + forward[i]->spreadsheet_characteristic_print(os); + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->initial_run)) { + os << "\n" << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + characteristics->initial_run[i]->spreadsheet_characteristic_print(os); + + if (characteristics->initial_run[i]->nb_element > 0) { + os << "\n\t" << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + + if ((forward) && (forward[i])) { + os << "\t" << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_FORWARD] << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << "\t" << STAT_label[STATL_CUMULATIVE] << " " + << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + forward[i]->Distribution::spreadsheet_print(os , true , false , false , + characteristics->initial_run[i]); + } + + else { + os << endl; + characteristics->initial_run[i]->spreadsheet_print(os); + } + } + } + + if ((characteristics) && (i < characteristics->nb_value)) { + os << "\n" << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + characteristics->final_run[i]->spreadsheet_characteristic_print(os); + } + + if (((forward) && (forward[i])) || ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0))) { + os << "\n"; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + os << "\t" << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if ((forward) && (forward[i])) { + os << "\t" << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_FORWARD] << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + forward[i]->Distribution::spreadsheet_print(os , true , false , false , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->final_run[i]->nb_element > 0)) ? + characteristics->final_run[i] : NULL)); + + } + + else { + os << endl; + characteristics->final_run[i]->spreadsheet_print(os); + } + } + } + } + + if ((nb_run) || ((characteristics) && (characteristics->nb_run))) { + for (i = 0;i < nb_value;i++) { + if (nb_run) { + if (length->variance == 0.) { + os << "\n" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + } + else { + os << "\n" << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_RUN_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTIONS] << endl; + } + nb_run[i]->spreadsheet_characteristic_print(os , (length->variance > 0. ? false : true)); + } + + if ((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_run)) { + os << "\n" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + characteristics->nb_run[i]->spreadsheet_characteristic_print(os , (length->variance > 0. ? false : true)); + } + + os << "\n"; + if ((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_run) && + (characteristics->nb_run[i]->nb_element > 0)) { + os << "\t" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if (nb_run) { + if (length->variance == 0.) { + os << "\t" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + else { + os << "\t" << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_RUN_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTIONS]; + } + if ((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_run) && + (characteristics->nb_run[i]->nb_element > 0)) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + if (length->variance == 0.) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + } + else { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] << " " + << STAT_label[STATL_FUNCTION] << endl; + } + + nb_run[i]->spreadsheet_print(os , true , false , false , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_run) && (characteristics->nb_run[i]->nb_element > 0)) ? characteristics->nb_run[i] : NULL)); + } + + else { + os << endl; + characteristics->nb_run[i]->spreadsheet_print(os); + } + } + } + + if ((nb_occurrence) || ((characteristics) && (characteristics->nb_occurrence))) { + for (i = 0;i < nb_value;i++) { + if (nb_occurrence) { + if (length->variance == 0.) { + os << "\n" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + } + else { + os << "\n" << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_OCCURRENCE_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTIONS] << endl; + } + nb_occurrence[i]->spreadsheet_characteristic_print(os , (length->variance > 0. ? false : true)); + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence)) { + os << "\n" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + characteristics->nb_occurrence[i]->spreadsheet_characteristic_print(os , (length->variance > 0. ? false : true)); + } + + os << "\n"; + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + os << "\t" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + + if (nb_occurrence) { + if (length->variance == 0.) { + os << "\t" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + else { + os << "\t" << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_OCCURRENCE_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTIONS]; + } + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + if (length->variance == 0.) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + } + else { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] << " " + << STAT_label[STATL_FUNCTION] << endl; + } + + nb_occurrence[i]->spreadsheet_print(os , true , false , false , + (((characteristics) && (i < characteristics->nb_value) && (characteristics->nb_occurrence) && (characteristics->nb_occurrence[i]->nb_element > 0)) ? characteristics->nb_occurrence[i] : NULL)); + } + + else { + os << endl; + characteristics->nb_occurrence[i]->spreadsheet_print(os); + } + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a CategoricalSequenceProcess object using Gnuplot. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] process observation process index, + * \param[in] empirical_observation pointer on the observation frequency distributions, + * \param[in] marginal_distribution pointer on the marginal frequency distribution, + * \param[in] characteristics pointer on the observed sequences characteristics, + * \param[in] length_distribution pointer on the sequence length frequency distribution, + * \param[in] forward pointer on the forward sojourn time distributions. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool CategoricalSequenceProcess::plot_print(const char *prefix , const char *title , int process , + FrequencyDistribution **empirical_observation , + FrequencyDistribution *marginal_distribution , + const SequenceCharacteristics *characteristics , + const FrequencyDistribution *length_distribution , + Forward **forward) const + +{ + bool status = false , start; + int i , j , k , m; + int index_length , nb_histo , nb_dist , histo_index , dist_index , *dist_nb_value; + double *scale; + Curves *smoothed_curves; + const Distribution **pdist; + const FrequencyDistribution **phisto; + ostringstream data_file_name[2]; + + + // writing of the data files + + if ((index_value) || (characteristics)) { + if (characteristics) { + index_length = characteristics->index_value->plot_length_computation(); + if (characteristics->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + smoothed_curves = new Curves(*(characteristics->index_value) , SMOOTHING); + } + else { + smoothed_curves = NULL; + } + } + + data_file_name[0] << prefix << process << 0 << ".dat"; + + if (index_value) { + if (characteristics) { + status = index_value->plot_print((data_file_name[0].str()).c_str() , index_length , + characteristics->index_value , smoothed_curves); + } + else { + status = index_value->plot_print((data_file_name[0].str()).c_str()); + } + } + + else { + status = characteristics->index_value->plot_print((data_file_name[0].str()).c_str() , + index_length , smoothed_curves); + } + + if (characteristics) { + delete smoothed_curves; + } + } + + if (status) { + pdist = new const Distribution*[6 * nb_value + 3 * nb_state + 2]; + dist_nb_value = new int[6 * nb_value + 3 * nb_state + 2]; + scale = new double[6 * nb_value + 3 * nb_state + 2]; + phisto = new const FrequencyDistribution*[7 * nb_value + nb_state + 3]; + + nb_histo = 0; + nb_dist = 0; + + if (length_distribution) { + phisto[nb_histo++] = length_distribution; + } + + if ((first_occurrence) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if ((first_occurrence) && (first_occurrence[i])) { + pdist[nb_dist] = first_occurrence[i]; + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + phisto[nb_histo] = characteristics->first_occurrence[i]; + dist_nb_value[nb_dist] = MIN(first_occurrence[i]->nb_value , phisto[nb_histo]->nb_value * 3); + scale[nb_dist++] = phisto[nb_histo++]->nb_element / + (1. - first_occurrence[i]->complement); +// first_occurrence[i]->cumul[first_occurrence[i]->nb_value - 1]; + } + else { + dist_nb_value[nb_dist] = first_occurrence[i]->nb_value; + scale[nb_dist++] = 1.; + } + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + phisto[nb_histo++] = characteristics->first_occurrence[i]; + } + } + } + + if ((recurrence_time) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if ((recurrence_time) && (recurrence_time[i])) { + pdist[nb_dist] = recurrence_time[i]; + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + phisto[nb_histo] = characteristics->recurrence_time[i]; + dist_nb_value[nb_dist] = MIN(recurrence_time[i]->nb_value , phisto[nb_histo]->nb_value * 3); + scale[nb_dist++] = phisto[nb_histo++]->nb_element / + (1. - recurrence_time[i]->complement); + } + else { + dist_nb_value[nb_dist] = recurrence_time[i]->nb_value; + scale[nb_dist++] = 1.; + } + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + phisto[nb_histo++] = characteristics->recurrence_time[i]; + } + } + } + + if ((sojourn_time) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if ((sojourn_time) && (sojourn_time[i])) { + pdist[nb_dist] = sojourn_time[i]; + dist_nb_value[nb_dist] = sojourn_time[i]->nb_value; + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + phisto[nb_histo] = characteristics->sojourn_time[i]; + if (sojourn_time[i]->cumul[sojourn_time[i]->nb_value - 1] < CUMUL_THRESHOLD) { + scale[nb_dist++] = phisto[nb_histo++]->nb_element / + (1. - sojourn_time[i]->complement); +// sojourn_time[i]->cumul[sojourn_time[i]->nb_value - 1]; + } + else { + scale[nb_dist++] = phisto[nb_histo++]->nb_element; + } + } + else { + scale[nb_dist++] = 1.; + } + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + phisto[nb_histo++] = characteristics->sojourn_time[i]; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->initial_run) && + (characteristics->initial_run[i]->nb_element > 0)) { + if ((forward) && (forward[i])) { + pdist[nb_dist] = forward[i]; + dist_nb_value[nb_dist] = forward[i]->nb_value; + phisto[nb_histo] = characteristics->initial_run[i]; + scale[nb_dist++] = phisto[nb_histo++]->nb_element; + } + + else { + phisto[nb_histo++] = characteristics->initial_run[i]; + } + } + + if ((forward) && (forward[i])) { + pdist[nb_dist] = forward[i]; + dist_nb_value[nb_dist] = forward[i]->nb_value; + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + phisto[nb_histo] = characteristics->final_run[i]; + scale[nb_dist++] = phisto[nb_histo++]->nb_element; + } + else { + scale[nb_dist++] = 1.; + } + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + phisto[nb_histo++] = characteristics->final_run[i]; + } + } + } + + if ((nb_run) || (nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + for (i = 0;i < nb_value;i++) { + if (nb_run) { + pdist[nb_dist] = nb_run[i]; + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_run) && (characteristics->nb_run[i]->nb_element > 0)) { + phisto[nb_histo] = characteristics->nb_run[i]; + dist_nb_value[nb_dist] = nb_run[i]->plot_nb_value_computation(phisto[nb_histo]); + scale[nb_dist++] = phisto[nb_histo++]->nb_element; + } + else { + dist_nb_value[nb_dist] = nb_run[i]->plot_nb_value_computation(); + scale[nb_dist++] = 1.; + } + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_run) && (characteristics->nb_run[i]->nb_element > 0)) { + phisto[nb_histo++] = characteristics->nb_run[i]; + } + + if (nb_occurrence) { + pdist[nb_dist] = nb_occurrence[i]; + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + phisto[nb_histo] = characteristics->nb_occurrence[i]; + dist_nb_value[nb_dist] = nb_occurrence[i]->plot_nb_value_computation(phisto[nb_histo]); + scale[nb_dist++] = phisto[nb_histo++]->nb_element; + } + else { + dist_nb_value[nb_dist] = nb_occurrence[i]->plot_nb_value_computation(); + scale[nb_dist++] = 1.; + } + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + phisto[nb_histo++] = characteristics->nb_occurrence[i]; + } + } + } + + if (observation) { + for (i = 0;i < nb_state;i++) { + pdist[nb_dist] = observation[i]; + dist_nb_value[nb_dist] = observation[i]->nb_value; + + if ((empirical_observation) && (empirical_observation[i]->nb_element > 0)) { + phisto[nb_histo++] = empirical_observation[i]; + scale[nb_dist++] = empirical_observation[i]->nb_element; + } + else { + scale[nb_dist++] = 1.; + } + } + + if (marginal_distribution) { + if ((weight) && (mixture)) { + for (i = 0;i < nb_state;i++) { + pdist[nb_dist] = observation[i]; + dist_nb_value[nb_dist] = observation[i]->nb_value; + scale[nb_dist++] = weight->mass[i] * marginal_distribution->nb_element; + } + + pdist[nb_dist] = mixture; + dist_nb_value[nb_dist] = mixture->nb_value; + phisto[nb_histo++] = marginal_distribution; + scale[nb_dist++] = marginal_distribution->nb_element; + } + + if ((restoration_weight) && (restoration_mixture)) { + for (i = 0;i < nb_state;i++) { + pdist[nb_dist] = observation[i]; + dist_nb_value[nb_dist] = observation[i]->nb_value; + scale[nb_dist++] = restoration_weight->mass[i] * marginal_distribution->nb_element; + } + + pdist[nb_dist] = restoration_mixture; + dist_nb_value[nb_dist] = restoration_mixture->nb_value; + phisto[nb_histo++] = marginal_distribution; + scale[nb_dist++] = marginal_distribution->nb_element; + } + } + } + + data_file_name[1] << prefix << process << 1 << ".dat"; + status = ::plot_print((data_file_name[1].str()).c_str() , nb_dist , pdist , scale , + dist_nb_value , nb_histo , phisto); + + if (status) { + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << process << 1 << ".plot"; + break; + case 1 : + file_name[0] << prefix << process << 1 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << process << 1 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n"; + + if (characteristics) { + if (characteristics->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + if (process > 0) { + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process << " - "; + } + out_file << SEQ_label[SEQL_SMOOTHED_OBSERVED_PROBABILITIES] << "\"\n\n"; + + if (index_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << index_length - 1 << "] [0:1] "; + + j = 0; + for (k = 0;k < nb_value;k++) { + if (k < characteristics->nb_value) { + j++; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << (index_value ? nb_value : 0) + characteristics->nb_value + k + 1 + << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << k << "\" with linespoints"; + } + if (index_value) { + j++; + if (k < characteristics->nb_value) { + out_file << ",\\" << endl; + } + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << k + 1 << " title \"" << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << k << "\" with linespoints"; + } + + if ((j == PLOT_NB_CURVE) && (k < nb_value - 1)) { + out_file << endl; + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << "\nplot [0:" << index_length - 1 << "] [0:1] "; + } + + else { + if (k < nb_value - 1) { + out_file << ",\\"; + } + out_file << endl; + } + } + + if (index_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + out_file << "set title"; + if ((title) || (process > 0)) { + out_file << " \""; + if (title) { + out_file << title; + if (process > 0) { + out_file << " - "; + } + } + if (process > 0) { + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + } + out_file << "\""; + } + out_file << "\n\n"; + + if (index_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << index_length - 1 << "] [0:1] "; + + j = 0; + for (k = 0;k < nb_value;k++) { + if (k < characteristics->nb_value) { + j++; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << (index_value ? nb_value : 0) + k + 1 << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << k << "\" with linespoints"; + } + if (index_value) { + j++; + if (k < characteristics->nb_value) { + out_file << ",\\" << endl; + } + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << k + 1 << " title \"" << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << k << "\" with linespoints"; + } + + if ((j == PLOT_NB_CURVE) && (k < nb_value - 1)) { + out_file << endl; + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << "\nplot [0:" << index_length - 1 << "] [0:1] "; + } + + else { + if (k < nb_value - 1) { + out_file << ",\\"; + } + out_file << endl; + } + } + + if (index_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(length_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << length_distribution->nb_value - 1 << "] [0:" + << (int)(length_distribution->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using 1 title \"" + << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(length_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + else { + out_file << "set title"; + if ((title) || (process > 0)) { + out_file << " \""; + if (title) { + out_file << title; + if (process > 0) { + out_file << " - "; + } + } + if (process > 0) { + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + } + out_file << "\""; + } + out_file << "\n\n"; + + if (index_value->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << index_value->length - 1 << "] [0:1] "; + + for (j = 0;j < nb_value;j++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << j + 1 << " title \"" << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << j << "\" with linespoints"; + if (j < nb_value - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (index_value->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = 1; + dist_index = 0; + + if ((first_occurrence) || (characteristics)) { + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << process << 2 << ".plot"; + break; + case 1 : + file_name[0] << prefix << process << 2 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << process << 2 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (process > 0)) { + out_file << " \""; + if (title) { + out_file << title; + if (process > 0) { + out_file << " - "; + } + } + if (process > 0) { + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + } + out_file << "\""; + } + out_file << "\n\n"; + + j = histo_index; + k = dist_index; + + start = true; + for (m = 0;m < nb_value;m++) { + if ((first_occurrence) && (first_occurrence[m])) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (MAX(dist_nb_value[k] , 2) - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->first_occurrence[m]->nb_element > 0)) { + out_file << "plot [0:" << MAX(dist_nb_value[k] , 2) - 1 << "] [0:" + << (int)(MAX(phisto[j]->max , pdist[k]->max * scale[k]) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints" << endl; + j++; + } + + else { + out_file << "plot [0:" << MAX(dist_nb_value[k] , 2) - 1 << "] [0:" + << MIN(pdist[k]->max * YSCALE , 1.) << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints" << endl; + } + + if (MAX(dist_nb_value[k] , 2) - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + k++; + } + + else if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->first_occurrence[m]->nb_element > 0)) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (MAX(phisto[j]->nb_value , 2) - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << MAX(phisto[j]->nb_value , 2) - 1 << "] [0:" + << (int)(phisto[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (MAX(phisto[j]->nb_value , 2) - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + j++; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = j; + dist_index = k; + } + + if ((recurrence_time) || (characteristics)) { + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << process << 3 << ".plot"; + break; + case 1 : + file_name[0] << prefix << process << 3 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << process << 3 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (process > 0)) { + out_file << " \""; + if (title) { + out_file << title; + if (process > 0) { + out_file << " - "; + } + } + if (process > 0) { + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + } + out_file << "\""; + } + out_file << "\n\n"; + + j = histo_index; + k = dist_index; + + start = true; + for (m = 0;m < nb_value;m++) { + if ((recurrence_time) && (recurrence_time[m])) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->recurrence_time[m]->nb_element > 0)) { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << (int)(MAX(phisto[j]->max , pdist[k]->max * scale[k]) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + j++; + } + + else { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << MIN(pdist[k]->max * YSCALE , 1.) << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + k++; + } + + else if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->recurrence_time[m]->nb_element > 0)) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << phisto[j]->nb_value - 1 << "] [0:" + << (int)(phisto[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + j++; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = j; + dist_index = k; + } + + if ((sojourn_time) || (characteristics)) { + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << process << 4 << ".plot"; + break; + case 1 : + file_name[0] << prefix << process << 4 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << process << 4 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (process > 0)) { + out_file << " \""; + if (title) { + out_file << title; + if (process > 0) { + out_file << " - "; + } + } + if (process > 0) { + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + } + out_file << "\""; + } + out_file << "\n\n"; + + j = histo_index; + k = dist_index; + + start = true; + for (m = 0;m < nb_value;m++) { + if ((sojourn_time) && (sojourn_time[m])) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->sojourn_time[m]->nb_element > 0)) { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << (int)(MAX(phisto[j]->max , pdist[k]->max * scale[k]) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + sojourn_time[m]->plot_title_print(out_file); + out_file << "\" with linespoints" << endl; + j++; + } + + else { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << MIN(pdist[k]->max * YSCALE , 1.) << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + sojourn_time[m]->plot_title_print(out_file); + out_file << "\" with linespoints" << endl; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + k++; + } + + else if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->sojourn_time[m]->nb_element > 0)) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << phisto[j]->nb_value - 1 << "] [0:" + << (int)(phisto[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + j++; + } + + if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->initial_run) && + (characteristics->initial_run[m]->nb_element > 0)) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if ((forward) && (forward[m])) { + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << (int)(MAX(phisto[j]->max , pdist[k]->max * scale[k]) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_INITIAL_RUN] << " - " << STAT_label[STATL_STATE] + << " " << m << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_STATE] << " " << m << " " << STAT_label[STATL_FORWARD] + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + k++; + } + + else { + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << phisto[j]->nb_value - 1 << "] [0:" + << (int)(phisto[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + j++; + } + + if ((forward) && (forward[m])) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->final_run[m]->nb_element > 0)) { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << (int)(MAX(phisto[j]->max , pdist[k]->max * scale[k]) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_FINAL_RUN] << " - " << STAT_label[STATL_STATE] + << " " << m << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_STATE] << " " << m << " " << STAT_label[STATL_FORWARD] + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + j++; + } + + else { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << MIN(pdist[k]->max * YSCALE , 1.) << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_STATE] << " " << m << " " << STAT_label[STATL_FORWARD] + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + k++; + } + + else if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->final_run[m]->nb_element > 0)) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << phisto[j]->nb_value - 1 << "] [0:" + << (int)(phisto[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << m + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + j++; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = j; + dist_index = k; + } + + if ((nb_run) || (nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << process << 5 << ".plot"; + break; + case 1 : + file_name[0] << prefix << process << 5 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << process << 5 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (process > 0)) { + out_file << " \""; + if (title) { + out_file << title; + if (process > 0) { + out_file << " - "; + } + } + if (process > 0) { + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + } + out_file << "\""; + } + out_file << "\n\n"; + + j = histo_index; + k = dist_index; + + start = true; + for (m = 0;m < nb_value;m++) { + if (nb_run) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->nb_run) && + (characteristics->nb_run[m]->nb_element > 0)) { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << (int)(MAX(phisto[j]->max , pdist[k]->max * scale[k]) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1; + if (length->variance == 0.) { + out_file << " title \"" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + else { + out_file << " title \"" << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_RUN_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + out_file << "\" with linespoints" << endl; + j++; + } + + else { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << MIN(pdist[k]->max * YSCALE , 1.) << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + k++; + } + + else if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->nb_run) && + (characteristics->nb_run[m]->nb_element > 0)) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << phisto[j]->nb_value - 1 << "] [0:" + << (int)(phisto[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + j++; + } + + if (nb_occurrence) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[m]->nb_element > 0)) { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << (int)(MAX(phisto[j]->max , pdist[k]->max * scale[k]) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1; + if (length->variance == 0.) { + out_file << " title \"" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + else { + out_file << " title \"" << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_OCCURRENCE_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + out_file << "\" with linespoints" << endl; + j++; + } + + else { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << MIN(pdist[k]->max * YSCALE , 1.) << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + k++; + } + + else if ((characteristics) && (m < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[m]->nb_element > 0)) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << phisto[j]->nb_value - 1 << "] [0:" + << (int)(phisto[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << m << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (phisto[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(phisto[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + j++; + } + } + + if ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence)) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(length_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << length_distribution->nb_value - 1 << "] [0:" + << (int)(length_distribution->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using 1 title \"" + << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(length_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = j; + dist_index = k; + } + + if (observation) { + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << process << 0 << ".plot"; + break; + case 1 : + file_name[0] << prefix << process << 0 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << process << 0 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if (title) { + out_file << " \"" << title << " - " << STAT_label[STATL_OUTPUT_PROCESS] + << " " << process << "\""; + } + out_file << "\n\n"; + + j = histo_index; + k = dist_index; + + for (m = 0;m < nb_state;m++) { + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if ((empirical_observation) && (empirical_observation[m]->nb_element > 0)) { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << (int)(MAX(phisto[j]->max , pdist[k]->max * scale[k]) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << STAT_label[STATL_STATE] << " " << m << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_STATE] << " " << m << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + j++; + } + + else { + out_file << "plot [0:" << dist_nb_value[k] - 1 << "] [0:" + << MIN(pdist[k]->max * YSCALE , 1.) << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_STATE] << " " << m << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints" << endl; + } + + if (dist_nb_value[k] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + k++; + + if ((i == 0) && (m < nb_state - 1)) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + } + + if (marginal_distribution) { + if ((weight) && (mixture)) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process << " - " + << STAT_label[STATL_THEORETICAL] << " " << STAT_label[STATL_WEIGHTS] << "\"\n\n"; + + if (nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << nb_value - 1 << "] [0:" + << (int)(MAX(marginal_distribution->max , mixture->max * marginal_distribution->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << STAT_label[STATL_MARGINAL] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses,\\" << endl; + j++; + + for (m = 0;m < nb_state;m++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_STATE] << " " << m << " " << STAT_label[STATL_OBSERVATION] + << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints,\\" << endl; + k++; + } + + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_MIXTURE] << "\" with linespoints" << endl; + k++; + + if (nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + if ((restoration_weight) && (restoration_mixture)) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process << " - " + << STAT_label[STATL_RESTORATION] << " " << STAT_label[STATL_WEIGHTS] << "\"\n\n"; + + if (nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << nb_value - 1 << "] [0:" + << (int)(MAX(marginal_distribution->max , restoration_mixture->max * marginal_distribution->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[1].str()).c_str()) << "\" using " << j + 1 + << " title \"" << STAT_label[STATL_MARGINAL] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses,\\" << endl; + j++; + + for (m = 0;m < nb_state;m++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_STATE] << " " << m << " " << STAT_label[STATL_OBSERVATION] + << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints,\\" << endl; + k++; + } + + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " << nb_histo + k + 1 + << " title \"" << STAT_label[STATL_MIXTURE] << "\" with linespoints" << endl; + k++; + + if (nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + delete [] pdist; + delete [] dist_nb_value; + delete [] scale; + delete [] phisto; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a CategoricalSequenceProcess object. + * + * \param[in] plot reference on a MultiPlotSet object, + * \param[in] index MultiPlot index, + * \param[in] process observation process index, + * \param[in] empirical_observation pointer on the observation frequency distributions, + * \param[in] marginal_distribution pointer on the marginal frequency distribution, + * \param[in] characteristics pointer on the observed sequences characteristics, + * \param[in] length_distribution pointer on the sequence length frequency distribution, + * \param[in] forward pointer on the forward sojourn time distributions. + */ +/*--------------------------------------------------------------*/ + +void CategoricalSequenceProcess::plotable_write(MultiPlotSet &plot , int &index , int process , + FrequencyDistribution **empirical_observation , + FrequencyDistribution *marginal_distribution , + const SequenceCharacteristics *characteristics , + const FrequencyDistribution *length_distribution , + Forward **forward) const + +{ + int i , j; + int index_length = 0 , dist_nb_value = 0; + double scale , max; + Curves *smoothed_curves; + ostringstream title , legend; + + // Order of plots: + // + intensity (one per variable) + // + + if ((index_value) || (characteristics)) { + plot.variable_nb_viewpoint[process]++; + } + if ((first_occurrence) || (characteristics)) { + plot.variable_nb_viewpoint[process]++; + } + if ((recurrence_time) || (characteristics)) { + plot.variable_nb_viewpoint[process]++; + } + if ((sojourn_time) || (characteristics)) { + plot.variable_nb_viewpoint[process]++; + } + if ((nb_run) || (nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + plot.variable_nb_viewpoint[process]++; + } + + if (characteristics) { + index_length = characteristics->index_value->plot_length_computation(); + + if (characteristics->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + + // fit of smoothed intensities + + plot.variable[index] = process; + plot.viewpoint[index] = INTENSITY; + + smoothed_curves = new Curves(*(characteristics->index_value) , SMOOTHING); + + title.str(""); + if (process > 0) { + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process << " - "; + } + title << SEQ_label[SEQL_SMOOTHED_OBSERVED_PROBABILITIES]; + plot[index].title = title.str(); + + plot[index].xrange = Range(0 , index_length - 1); + if (index_length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].yrange = Range(0. , 1.); + + plot[index].resize(index_value ? nb_value * 2 : nb_value); + + i = 0; + for (j = 0;j < nb_value;j++) { + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + + smoothed_curves->plotable_write(j , plot[index][i]); + i++; + + if (index_value) { + legend.str(""); + legend << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + + index_value->plotable_write(j , plot[index][i]); + i++; + } + } + + delete smoothed_curves; + index++; + } + + // fit of intensities + + plot.variable[index] = process; + plot.viewpoint[index] = INTENSITY; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , index_length - 1); + if (index_length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].yrange = Range(0. , 1.); + + plot[index].resize(index_value ? nb_value * 2 : nb_value); + + i = 0; + for (j = 0;j < nb_value;j++) { + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + + characteristics->index_value->plotable_write(j , plot[index][i]); + i++; + + if (index_value) { + legend.str(""); + legend << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + + index_value->plotable_write(j , plot[index][i]); + i++; + } + } + index++; + + // sequence length frequency distribution + + plot.variable[index] = process; + plot.viewpoint[index] = INTENSITY; + + plot[index].xrange = Range(0 , length_distribution->nb_value - 1); + plot[index].yrange = Range(0 , ceil(length_distribution->max * YSCALE)); + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(length_distribution->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + length_distribution->plotable_frequency_write(plot[index][0]); + index++; + } + + else { + + // theoretical intensity + index_length = index_value->length; + plot.variable[index] = process; + plot.viewpoint[index] = INTENSITY; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , index_length - 1); + if (index_length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].yrange = Range(0. , 1.); + + plot[index].resize(nb_value); + + for (i = 0;i < nb_value;i++) { + legend.str(""); + legend << SEQ_label[SEQL_THEORETICAL] << " " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + + index_value->plotable_write(i , plot[index][i]); + } + index++; + } + + if ((first_occurrence) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if ((first_occurrence) && (first_occurrence[i])) { + + // fit of the distribution of the time to the 1st occurrence of a state/observation + + plot.variable[index] = process; + plot.viewpoint[index] = FIRST_OCCURRENCE; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , MAX(first_occurrence[i]->nb_value , 2) - 1); + if (MAX(first_occurrence[i]->nb_value , 2) - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + scale = characteristics->first_occurrence[i]->nb_element / + (1. - first_occurrence[i]->complement); + plot[index].yrange = Range(0 , ceil(MAX(characteristics->first_occurrence[i]->max , + first_occurrence[i]->max * scale) * YSCALE)); + + plot[index].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->first_occurrence[i]->plotable_frequency_write(plot[index][0]); + j = 1; + } + + else { + scale = 1.; + plot[index].yrange = Range(0. , MIN(first_occurrence[i]->max * YSCALE , 1.)); + + plot[index].resize(1); + j = 0; + } + + legend.str(""); + legend << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_DISTRIBUTION]; + plot[index][j].legend = legend.str(); + + plot[index][j].style = "linespoints"; + + first_occurrence[i]->plotable_mass_write(plot[index][j] , scale); + index++; + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + + // frequency distribution of the time to the 1st occurrence of a state/observation + + plot.variable[index] = process; + plot.viewpoint[index] = FIRST_OCCURRENCE; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , MAX(characteristics->first_occurrence[i]->nb_value , 2) - 1); + plot[index].yrange = Range(0 , ceil(characteristics->first_occurrence[i]->max * YSCALE)); + + if (MAX(characteristics->first_occurrence[i]->nb_value , 2) - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(characteristics->first_occurrence[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->first_occurrence[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + } + + if ((recurrence_time) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if ((recurrence_time) && (recurrence_time[i])) { + + // fit of the distribution of the recurrence time in a state/observation + + plot.variable[index] = process; + plot.viewpoint[index] = RECURRENCE_TIME; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , recurrence_time[i]->nb_value - 1); + if (recurrence_time[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + scale = characteristics->recurrence_time[i]->nb_element / + (1. - recurrence_time[i]->complement); + plot[index].yrange = Range(0 , ceil(MAX(characteristics->recurrence_time[i]->max , + recurrence_time[i]->max * scale) * YSCALE)); + + plot[index].resize(2); + + legend.str(""); + legend << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->recurrence_time[i]->plotable_frequency_write(plot[index][0]); + j = 1; + } + + else { + scale = 1.; + plot[index].yrange = Range(0. , MIN(recurrence_time[i]->max * YSCALE , 1.)); + + plot[index].resize(1); + j = 0; + } + + legend.str(""); + legend << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[index][j].legend = legend.str(); + + plot[index][j].style = "linespoints"; + + recurrence_time[i]->plotable_mass_write(plot[index][j] , scale); + index++; + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + + // frequency distribution of the recurrence time in a state/observation + + plot.variable[index] = process; + plot.viewpoint[index] = RECURRENCE_TIME; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , characteristics->recurrence_time[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(characteristics->recurrence_time[i]->max * YSCALE)); + + if (characteristics->recurrence_time[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(characteristics->recurrence_time[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->recurrence_time[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + } + + if ((sojourn_time) || (characteristics)) { + for (i = 0;i < nb_value;i++) { + if ((sojourn_time) && (sojourn_time[i])) { + + // fit of the distribution of the sojourn time in a state/observation + + plot.variable[index] = process; + plot.viewpoint[index] = SOJOURN_TIME; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , sojourn_time[i]->nb_value - 1); + if (sojourn_time[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + scale = characteristics->sojourn_time[i]->nb_element / + (1. - sojourn_time[i]->complement); + plot[index].yrange = Range(0 , ceil(MAX(characteristics->sojourn_time[i]->max , + sojourn_time[i]->max * scale) * YSCALE)); + + plot[index].resize(2); + + legend.str(""); + legend << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->sojourn_time[i]->plotable_frequency_write(plot[index][0]); + j = 1; + } + + else { + scale = 1.; + plot[index].yrange = Range(0. , MIN(sojourn_time[i]->max * YSCALE , 1.)); + + plot[index].resize(1); + j = 0; + } + + legend.str(""); + legend << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + sojourn_time[i]->plot_title_print(legend); + plot[index][j].legend = legend.str(); + + plot[index][j].style = "linespoints"; + + sojourn_time[i]->plotable_mass_write(plot[index][j] , scale); + index++; + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + + // frequency distribution of the sojourn time in a state/observation + + plot.variable[index] = process; + plot.viewpoint[index] = SOJOURN_TIME; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , characteristics->sojourn_time[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(characteristics->sojourn_time[i]->max * YSCALE)); + + if (characteristics->sojourn_time[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(characteristics->sojourn_time[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->sojourn_time[i]->plotable_frequency_write(plot[index][0]); + index++; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->initial_run) && + (characteristics->initial_run[i]->nb_element > 0)) { + if ((forward) && (forward[i])) { + + // fit of the distribution of the sojourn time in the first visited state + + plot.variable[index] = process; + plot.viewpoint[index] = SOJOURN_TIME; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , forward[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(MAX(characteristics->initial_run[i]->max , + forward[i]->max * characteristics->initial_run[i]->nb_element) * YSCALE)); + + if (forward[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->initial_run[i]->plotable_frequency_write(plot[index][0]); + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_FORWARD] << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[index][1].legend = legend.str(); + + plot[index][1].style = "linespoints"; + + forward[i]->plotable_mass_write(plot[index][1] , characteristics->initial_run[i]->nb_element); + index++; + } + + else { + + // frequency distribution of the sojourn time in the first visited state + + plot.variable[index] = process; + plot.viewpoint[index] = SOJOURN_TIME; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , characteristics->initial_run[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(characteristics->initial_run[i]->max * YSCALE)); + + if (characteristics->initial_run[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(characteristics->initial_run[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->initial_run[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + + if ((forward) && (forward[i])) { + + // fit of the distribution of the sojourn time in the last visited state/observation + + plot.variable[index] = process; + plot.viewpoint[index] = SOJOURN_TIME; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , forward[i]->nb_value - 1); + if (forward[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + scale = characteristics->final_run[i]->nb_element; + plot[index].yrange = Range(0 , ceil(MAX(characteristics->final_run[i]->max , + forward[i]->max * scale) * YSCALE)); + + plot[index].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->final_run[i]->plotable_frequency_write(plot[index][0]); + j = 1; + + } + + else { + scale = 1.; + plot[index].yrange = Range(0. , MIN(forward[i]->max * YSCALE , 1.)); + + plot[index].resize(1); + j = 0; + } + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_FORWARD] << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[index][j].legend = legend.str(); + + plot[index][j].style = "linespoints"; + + forward[i]->plotable_mass_write(plot[index][j] , scale); + index++; + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + + // frequency distribution of the sojourn time in the last visited state/observation + + plot.variable[index] = process; + plot.viewpoint[index] = SOJOURN_TIME; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , characteristics->final_run[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(characteristics->final_run[i]->max * YSCALE)); + + if (characteristics->final_run[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(characteristics->final_run[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->final_run[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + } + + if ((nb_run) || (nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + for (i = 0;i < nb_value;i++) { + if (nb_run) { + + // fit of the distribution of the number of runs of a state/observation per sequence + + plot.variable[index] = process; + plot.viewpoint[index] = COUNTING; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_run) && + (characteristics->nb_run[i]->nb_element > 0)) { + dist_nb_value = nb_run[i]->plot_nb_value_computation(characteristics->nb_run[i]); + scale = characteristics->nb_run[i]->nb_element; + + plot[index].yrange = Range(0 , ceil(MAX(characteristics->nb_run[i]->max , + nb_run[i]->max * scale) * YSCALE)); + + plot[index].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->nb_run[i]->plotable_frequency_write(plot[index][0]); + j = 1; + } + + else { + dist_nb_value = nb_run[i]->plot_nb_value_computation(); + scale = 1.; + + plot[index].yrange = Range(0. , MIN(nb_run[i]->max * YSCALE , 1.)); + + plot[index].resize(1); + j = 0; + } + + plot[index].xrange = Range(0 , dist_nb_value); + if (dist_nb_value < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + legend.str(""); + if (length->variance == 0.) { + legend << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + else { + legend << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_RUN_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + plot[index][j].legend = legend.str(); + + plot[index][j].style = "linespoints"; + + nb_run[i]->plotable_mass_write(plot[index][j] , scale); + index++; + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_run) && + (characteristics->nb_run[i]->nb_element > 0)) { + + // frequency distribution of the number of runs of a state/observation per sequence + + plot.variable[index] = process; + plot.viewpoint[index] = COUNTING; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , MAX(characteristics->nb_run[i]->nb_value , 2) - 1); + plot[index].yrange = Range(0 , ceil(characteristics->nb_run[i]->max * YSCALE)); + + if (MAX(characteristics->nb_run[i]->nb_value , 2) - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(characteristics->nb_run[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->nb_run[i]->plotable_frequency_write(plot[index][0]); + index++; + } + + if (nb_occurrence) { + + // fit of the distribution of the number of occurrences of a state/observation per sequence + + plot.variable[index] = process; + plot.viewpoint[index] = COUNTING; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + dist_nb_value = nb_occurrence[i]->plot_nb_value_computation(characteristics->nb_occurrence[i]); + scale = characteristics->nb_occurrence[i]->nb_element; + + plot[index].yrange = Range(0 , ceil(MAX(characteristics->nb_occurrence[i]->max , + nb_occurrence[i]->max * scale) * YSCALE)); + + plot[index].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->nb_occurrence[i]->plotable_frequency_write(plot[index][0]); + j = 1; + } + + else { + dist_nb_value = nb_occurrence[i]->plot_nb_value_computation(); + scale = 1.; + + plot[index].yrange = Range(0. , MIN(nb_occurrence[i]->max * YSCALE , 1.)); + + plot[index].resize(1); + j = 0; + } + + plot[index].xrange = Range(0 , dist_nb_value); + if (dist_nb_value < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + legend.str(""); + if (length->variance == 0.) { + legend << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_LENGTH] << " " << length->offset << " " + << SEQ_label[SEQL_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + else { + legend << SEQ_label[SEQL_MIXTURE_OF] << SEQ_label[SEQL_NB_OCCURRENCE_OF] + << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_DISTRIBUTION]; + } + plot[index][j].legend = legend.str(); + + plot[index][j].style = "linespoints"; + + nb_occurrence[i]->plotable_mass_write(plot[index][j] , scale); + index++; + } + + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + + // frequency distribution of the number of occurrences of a state/observation per sequence + + plot.variable[index] = process; + plot.viewpoint[index] = COUNTING; + + if (process > 0) { + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , MAX(characteristics->nb_occurrence[i]->nb_value , 2) - 1); + plot[index].yrange = Range(0 , ceil(characteristics->nb_occurrence[i]->max * YSCALE)); + + if (MAX(characteristics->nb_occurrence[i]->nb_value , 2) - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(characteristics->nb_occurrence[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[process == 0 ? STATL_STATE : STATL_OUTPUT] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + characteristics->nb_occurrence[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + + if ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence)) { + + // sequence length frequency distribution + + plot.variable[index] = process; + plot.viewpoint[index] = COUNTING; + + plot[index].xrange = Range(0 , length_distribution->nb_value - 1); + plot[index].yrange = Range(0 , ceil(length_distribution->max * YSCALE)); + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(length_distribution->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + length_distribution->plotable_frequency_write(plot[index][0]); + index++; + } + } + + if (observation) { + if (empirical_observation) { + for (i = 0;i < nb_state;i++) { + + // plot observation distributions + + plot.variable[index] = process; + plot.viewpoint[index] = OBSERVATION; + + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + + plot[index].xrange = Range(0 , observation[i]->nb_value - 1); + if (observation[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + if (empirical_observation[i]->nb_element > 0) { + scale = empirical_observation[i]->nb_element; + plot[index].yrange = Range(0 , ceil(MAX(empirical_observation[i]->max , + observation[i]->max * scale) * YSCALE)); + + plot[index].resize(2); + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + empirical_observation[i]->plotable_frequency_write(plot[index][0]); + j = 1; + } + + else { + scale = 1; + plot[index].yrange = Range(0 , MIN(observation[i]->max * YSCALE , 1.)); + + plot[index].resize(1); + j = 0; + } + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[index][j].legend = legend.str(); + + plot[index][j].style = "linespoints"; + + observation[i]->plotable_mass_write(plot[index][j] , scale); + index++; + } + } + + else { + + // observation distributions + + plot.variable[index] = process; + plot.viewpoint[index] = OBSERVATION; + + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + plot[index].title = title.str(); + + plot[index].xrange = Range(0 , nb_value - 1); + if (nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + max = observation[0]->max; + for (i = 1;i < nb_state;i++) { + if (observation[i]->max > max) { + max = observation[i]->max; + } + } + plot[index].yrange = Range(0 , MIN(max * YSCALE , 1.)); + + plot[index].resize(nb_state); + + for (i = 0;i < nb_state;i++) { + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " + << STAT_label[STATL_OBSERVATION] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + + observation[i]->plotable_mass_write(plot[index][i]); + } + + index++; + } + + if (marginal_distribution) { + if ((weight) && (mixture)) { + + // fit of the mixture of observation distributions (theoretical weights) + + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process << " - " + << STAT_label[STATL_THEORETICAL] << " " << STAT_label[STATL_WEIGHTS]; + plot[index].title = title.str(); + + plot[index].xrange = Range(0 , nb_value - 1); + if (nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].yrange = Range(0 , ceil(MAX(marginal_distribution->max , + mixture->max * marginal_distribution->nb_element) * YSCALE)); + + plot[index].resize(nb_state + 2); + + legend.str(""); + legend << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + marginal_distribution->plotable_frequency_write(plot[index][0]); + + for (i = 0;i < nb_state;i++) { + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION] << " " + << STAT_label[STATL_DISTRIBUTION]; + observation[i]->plot_title_print(legend); + plot[index][i + 1].legend = legend.str(); + + plot[index][i + 1].style = "linespoints"; + + observation[i]->plotable_mass_write(plot[index][i + 1] , + weight->mass[i] * marginal_distribution->nb_element); + } + + plot[index][nb_state + 1].legend = STAT_label[STATL_MIXTURE]; + + plot[index][nb_state + 1].style = "linespoints"; + + mixture->plotable_mass_write(plot[index][nb_state + 1] , + marginal_distribution->nb_element); + + index++; + } + + if ((restoration_weight) && (restoration_mixture)) { + + // fit of the mixture of observation distributions (restoration weights) + + title.str(""); + if (process > 0) { + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process; + } + title << STAT_label[STATL_RESTORATION] << " " << STAT_label[STATL_WEIGHTS]; + plot[index].title = title.str(); + + plot[index].xrange = Range(0 , nb_value - 1); + if (nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].yrange = Range(0 , ceil(MAX(marginal_distribution->max , + restoration_mixture->max * marginal_distribution->nb_element) * YSCALE)); + + plot[index].resize(nb_state + 2); + + legend.str(""); + legend << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + marginal_distribution->plotable_frequency_write(plot[index][0]); + + for (i = 0;i < nb_state;i++) { + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION] << " " + << STAT_label[STATL_DISTRIBUTION]; + observation[i]->plot_title_print(legend); + plot[index][i + 1].legend = legend.str(); + + plot[index][i + 1].style = "linespoints"; + + observation[i]->plotable_mass_write(plot[index][i + 1] , + restoration_weight->mass[i] * marginal_distribution->nb_element); + } + + plot[index][nb_state + 1].legend = STAT_label[STATL_MIXTURE]; + + plot[index][nb_state + 1].style = "linespoints"; + + restoration_mixture->plotable_mass_write(plot[index][nb_state + 1] , + marginal_distribution->nb_element); + + index++; + } + } + } +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/change_points1.cpp b/src/cpp/sequence_analysis/change_points1.cpp new file mode 100644 index 0000000..df4eddd --- /dev/null +++ b/src/cpp/sequence_analysis/change_points1.cpp @@ -0,0 +1,6286 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: change_points1.cpp 18669 2015-11-09 12:08:08Z guedon $ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include + +#include +#include +#include + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost::math; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Determination of the width of a column of reals. + * + * \param[in] nb_value number of values, + * \param[in] value pointer on real values. + * + * \return column width. + */ +/*--------------------------------------------------------------*/ + +int column_width(int nb_value , const long double *value) + +{ + int i; + int width , max_width = 0; + + + for (i = 0;i < nb_value;i++) { + ostringstream ostring; + ostring << value[i]; + width = (ostring.str()).size(); + if (width > max_width) { + max_width = width; + } + } + + return max_width; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log of the factorial of a value. + * + * \param[in] value value. + * + * \return log of the factorial of a value. + */ +/*--------------------------------------------------------------*/ + +double log_factorial(int value) + +{ + int i; + double log_factorial; + + + log_factorial = 0.; + for (i = 2;i <= value;i++) { + log_factorial += log((double)i); + } + + return log_factorial; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log of a binomial coefficient for + * a negative binomial distribution. + * + * \param[in] inf_bound inf bound to the support, + * \param[in] parameter shape parameter, + * \param[in] value value. + * + * \return log of a binomial coefficient. + */ +/*--------------------------------------------------------------*/ + +double log_binomial_coefficient(int inf_bound , double parameter , int value) + +{ + int i; + double set , subset , log_coeff; + + + subset = parameter - 1.; + set = subset; + log_coeff = 0.; + + for (i = inf_bound;i < value;i++) { + set++; + log_coeff += log(set / (set - subset)); + } + +# ifdef MESSAGE + if (parameter == (int)parameter) { + double ilog_coeff = log(binomial_coefficient(value - inf_bound + parameter - 1 , parameter - 1)); + + if ((log_coeff < ilog_coeff - DOUBLE_ERROR) || (log_coeff > ilog_coeff + DOUBLE_ERROR)) { + cout << "TEST binomial coeff: " << log_coeff << " " << ilog_coeff << endl; + } + } +# endif + + return (log_coeff); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Empirical determination of the hyperparameters of a gamma prior + * distribution for a Poisson distribution. + * + * \param[in] index sequence index, + * \param[in] variable variable index, + * \param[in] hyperparam pointer on the hyperparameters. + */ +/*--------------------------------------------------------------*/ + +void Sequences::gamma_hyperparameter_computation(int index , int variable , + double *hyperparam) const + +{ + int i; + double mean , diff , variance; + + + if (length[index] > 1) { + mean = 0.; + for (i = 0;i < length[index];i++) { + mean += int_sequence[index][variable][i]; + } + mean /= length[index]; + + variance = 0.; + for (i = 0;i < length[i];i++) { + diff = int_sequence[index][variable][i] - mean; + variance += diff * diff; + } + variance /= (length[index] - 1); + + hyperparam[0] = mean * mean / (variance * PRIOR_VARIANCE_FACTOR); + hyperparam[1] = mean / (variance * PRIOR_VARIANCE_FACTOR); + +# ifdef MESSAGE + hyperparam[0] = 1.; + hyperparam[1] = 200. / 365.; +# endif + + } + + else { + hyperparam[0] = D_DEFAULT; + hyperparam[1] = D_DEFAULT; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Empirical determination of the hyperparameters of a Gaussian-gamma prior + * distribution for a Gaussian distribution. + * + * \param[in] index sequence index, + * \param[in] variable variable index, + * \param[in] hyperparam pointer on the hyperparameters. + */ +/*--------------------------------------------------------------*/ + +void Sequences::gaussian_gamma_hyperparameter_computation(int index , int variable , + double *hyperparam) const + +{ + int i; + int magnitude; + double mean , diff , dispersion , round_factor; + + + if (length[index] > 1) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (i = 0;i < length[index];i++) { + mean += int_sequence[index][variable][i]; + } + } + else { + for (i = 0;i < length[index];i++) { + mean += real_sequence[index][variable][i]; + } + } + mean /= length[index]; + + dispersion = 0.; + + if (type[variable] != REAL_VALUE) { + for (i = 1;i < length[index];i++) { + diff = int_sequence[index][variable][i] - int_sequence[index][variable][i - 1]; + dispersion += diff * diff; + } + } + else { + for (i = 1;i < length[index];i++) { + diff = real_sequence[index][variable][i] - real_sequence[index][variable][i - 1]; + dispersion += diff * diff; + } + } + dispersion /= (2 * (length[index] - 1)); + + hyperparam[0] = mean; + hyperparam[1] = PRIOR_SAMPLE_SIZE; + hyperparam[2] = PRIOR_DEGREES_OF_FREEDOM; + hyperparam[3] = dispersion / PRIOR_DISPERSION_FACTOR; + + magnitude = (int)(log10(hyperparam[0])) + 1; + if (magnitude > PRIOR_PRECISION) { + round_factor = pow(10.0 , magnitude - PRIOR_PRECISION); + +# ifdef DEBUG + cout << "\nTEST 0: " << magnitude << " " << round_factor << endl; +# endif + + hyperparam[0] = round_factor * ::round(hyperparam[0] / round_factor); + } + + magnitude = (int)(log10(hyperparam[3])) + 1; + if (magnitude > PRIOR_PRECISION) { + round_factor = pow(10.0 , magnitude - PRIOR_PRECISION); + +# ifdef DEBUG + cout << "\nTEST 3: " << magnitude << " " << round_factor << endl; +# endif + + hyperparam[3] = round_factor * ::round(hyperparam[3] / round_factor); + } + +// hyperparam[3] /= 10; + } + + else { + hyperparam[0] = D_DEFAULT; + hyperparam[1] = D_DEFAULT; + hyperparam[2] = D_DEFAULT; + hyperparam[3] = D_DEFAULT; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of free parameters. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals. + * + * \return number of free parameters. + */ +/*--------------------------------------------------------------*/ + +int Sequences::nb_parameter_computation(int index , int nb_segment , segment_model *model_type , + bool common_contrast) const + +{ + bool *used_output; + int i , j , k , m; + int nb_parameter , max_nb_value; + + +// nb_parameter = 0; + nb_parameter = nb_segment - 1; + + if (model_type[0] == MEAN_CHANGE) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter += nb_segment + 1; + } + else { + nb_parameter += nb_sequence * nb_segment + 1; + } + } + + else if (model_type[0] == INTERCEPT_SLOPE_CHANGE) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter += nb_segment * 2 + 1; + } + else { + nb_parameter += nb_sequence * nb_segment * 2 + 1; + } + } + + else { + max_nb_value = 0; + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == CATEGORICAL_CHANGE) && (marginal_distribution[i]->nb_value > max_nb_value)) { + max_nb_value = marginal_distribution[i]->nb_value; + } + } + + if (max_nb_value > 0) { + used_output = new bool[max_nb_value]; + } + else { + used_output = NULL; + } + + for (i = 1;i < nb_variable;i++) { + if (model_type[i - 1] == CATEGORICAL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < length[j];k++) { + if ((k == 0) || ((k > 0) && (int_sequence[j][0][k] != int_sequence[j][0][k - 1]))) { + for (m = 0;m < marginal_distribution[i]->nb_value;m++) { + used_output[m] = false; + } + nb_parameter--; + } + + if (!used_output[int_sequence[j][i][k]]) { + nb_parameter++; + used_output[int_sequence[j][i][k]] = true; + } + } + } + } + } + + else { + for (j = 0;j < length[0];j++) { + if ((j == 0) || ((j > 0) && (int_sequence[0][0][j] != int_sequence[0][0][j - 1]))) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + used_output[k] = false; + } + nb_parameter--; + } + + for (k = 0;k < nb_sequence;k++) { + if (!used_output[int_sequence[k][i][j]]) { + nb_parameter++; + used_output[int_sequence[k][i][j]] = true; + } + } + + } + } + } + + else if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter += nb_segment; + } + else { + nb_parameter += nb_sequence * nb_segment; + } + } + + else if ((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) || + (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter += 2 * nb_segment; + } + else { + nb_parameter += nb_sequence * 2 * nb_segment; + } + } + + else if (model_type[i - 1] == VARIANCE_CHANGE) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter += nb_segment + 1; + } + else { + nb_parameter += nb_sequence * (nb_segment + 1); + } + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter += 3 * nb_segment; + } + else { + nb_parameter += nb_sequence * 3 * nb_segment; + } + } + + else if (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter += 2 * nb_segment + 1; + } + else { + nb_parameter += nb_sequence * (2 * nb_segment + 1); + } + } + } + + delete [] used_output; + } + + return nb_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood in the case of a single segment. + * + * \param[in] index sequence index, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] rank ranks (for ordinal variables). + * + * \return log-likelihood of the single-segment model. + */ +/*--------------------------------------------------------------*/ + +double Sequences::one_segment_likelihood(int index , segment_model *model_type , bool common_contrast , + double *shape_parameter , double **rank) const + +{ + int i , j , k; + int max_nb_value , seq_length , count , *frequency , *inf_bound_parameter , *seq_index_parameter; + double sum , factorial_sum , binomial_coeff_sum , proba , mean , diff , index_parameter_mean , + index_parameter_diff , index_parameter_sum , shifted_diff , likelihood; + long double index_parameter_square_sum , square_sum , mix_square_sum , shifted_square_sum , + autocovariance , *residual; + + + max_nb_value = 0; + inf_bound_parameter = new int[nb_variable]; + residual = NULL; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == CATEGORICAL_CHANGE) && (marginal_distribution[i]->nb_value > max_nb_value)) { + max_nb_value = marginal_distribution[i]->nb_value; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + } + + if (((i == 1) && ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE))) || + (((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i - 1] == LINEAR_MODEL_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && (!residual))) { + residual = new long double[nb_sequence]; + } + } + + if (max_nb_value > 0) { + frequency = new int[max_nb_value]; + } + else { + frequency = NULL; + } + + seq_length = length[index == I_DEFAULT ? 0 : index]; + seq_index_parameter = NULL; + + for (i = 1;i < nb_variable;i++) { + if (((i == 1) && (model_type[0] == INTERCEPT_SLOPE_CHANGE)) || + ((model_type[i - 1] == LINEAR_MODEL_CHANGE) && (!seq_index_parameter))) { + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + likelihood = 0.; + } + + for (i = 1;i < nb_variable;i++) { + if (model_type[i - 1] == CATEGORICAL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + frequency[k] = 0; + } + + for (k = 0;k < length[j];k++) { + frequency[int_sequence[j][i][k]]++; + } + + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + if (frequency[k] > 0) { + likelihood += frequency[k] * log((double)frequency[k] / (double)length[j]); + } + } + } + } + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + frequency[j] = 0; + } + + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + frequency[int_sequence[k][i][j]]++; + } + } + + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (frequency[j] > 0) { + likelihood += frequency[j] * log((double)frequency[j] / (double)(nb_sequence * length[0])); + } + } + } + } + + else if (model_type[i - 1] == POISSON_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = 0.; + factorial_sum = 0.; + for (k = 0;k < length[j];k++) { + sum += int_sequence[j][i][k]; + factorial_sum += log_factorial(int_sequence[j][i][k]); + } + + if (sum > 0.) { + likelihood += sum * (log(sum / length[j]) - 1) - factorial_sum; + } + } + } + } + + else { + sum = 0.; + factorial_sum = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + factorial_sum += log_factorial(int_sequence[k][i][j]); + } + } + + if (sum > 0.) { + likelihood += sum * (log(sum / (nb_sequence * length[0])) - 1) - factorial_sum; + } + } + } + + else if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = 0.; + binomial_coeff_sum = 0.; + for (k = 0;k < length[j];k++) { + sum += int_sequence[j][i][k]; + binomial_coeff_sum += log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[j][i][k]); + } + + if (sum > inf_bound_parameter[i - 1] * length[j]) { + proba = shape_parameter[i - 1] * length[j] / + ((shape_parameter[i - 1] - inf_bound_parameter[i - 1]) * length[j] + sum); + likelihood += binomial_coeff_sum + shape_parameter[i - 1] * length[j] * log(proba) + + (sum - inf_bound_parameter[i - 1] * length[j]) * log(1. - proba); + } + else { + likelihood = D_INF; + break; + } + } + } + } + + else { + sum = 0.; + binomial_coeff_sum = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + binomial_coeff_sum += log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[k][i][j]); + } + } + + if (sum > inf_bound_parameter[i - 1] * nb_sequence * length[0]) { + proba = shape_parameter[i - 1] * nb_sequence * length[0] / + ((shape_parameter[i - 1] - inf_bound_parameter[i - 1]) * nb_sequence * length[0] + sum); + likelihood += binomial_coeff_sum + shape_parameter[i - 1] * nb_sequence * length[0] * log(proba) + + (sum - inf_bound_parameter[i - 1] * nb_sequence * length[0]) * log(1. - proba); + } + else { + likelihood = D_INF; + break; + } + } + } + + else if ((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[0] == MEAN_CHANGE) || + (model_type[i - 1] == VARIANCE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { +/* residual[j] = 0.; + sum = int_sequence[j][i][0]; + + for (k = 1;k < length[j];k++) { + diff = int_sequence[j][i][k] - sum / k; + residual[j] += ((double)k / (double)(k + 1)) * diff * diff; + sum += int_sequence[j][i][k]; + } */ + + mean = 0.; + for (k = 0;k < length[j];k++) { + mean += int_sequence[j][i][k]; + } + mean /= length[j]; + + residual[j] = 0.; + for (k = 0;k < length[j];k++) { + diff = int_sequence[j][i][k] - mean; + residual[j] += diff * diff; + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { +/* residual[j] = 0.; + sum = real_sequence[j][i][0]; + + for (k = 1;k < length[j];k++) { + diff = real_sequence[j][i][k] - sum / k; + residual[j] += ((double)k / (double)(k + 1)) * diff * diff; + sum += real_sequence[j][i][k]; + } */ + + mean = 0.; + for (k = 0;k < length[j];k++) { + mean += real_sequence[j][i][k]; + } + mean /= length[j]; + + residual[j] = 0.; + for (k = 0;k < length[j];k++) { + diff = real_sequence[j][i][k] - mean; + residual[j] += diff * diff; + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { +/* residual[0] = 0.; + sum = 0.; + count = 0; + + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = int_sequence[k][i][j] - sum / count; + residual[0] += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += int_sequence[k][i][j]; + } + } */ + + mean = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + mean += int_sequence[k][i][j]; + } + } + mean /= nb_sequence * length[0]; + + residual[0] = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][i][j] - mean; + residual[0] += diff * diff; + } + } + } + + else { +/* residual[0] = 0.; + sum = 0.; + count = 0; + + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = real_sequence[k][i][j] - sum / count; + residual[0] += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += real_sequence[k][i][j]; + } + } */ + + mean = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + mean += real_sequence[k][i][j]; + } + } + mean /= nb_sequence * length[0]; + + residual[0] = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][i][j] - mean; + residual[0] += diff * diff; + } + } + } + } + } + + else if (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { +/* residual[j] = 0.; + sum = rank[i][int_sequence[j][i][0]]; + + for (k = 1;k < length[j];k++) { + diff = rank[i][int_sequence[j][i][k]] - sum / k; + residual[j] += ((double)k / (double)(k + 1)) * diff * diff; + sum += rank[i][int_sequence[j][i][k]]; + } */ + + mean = 0.; + for (k = 0;k < length[j];k++) { + mean += rank[i][int_sequence[j][i][k]]; + } + mean /= length[j]; + + residual[j] = 0.; + for (k = 0;k < length[j];k++) { + diff = rank[i][int_sequence[j][i][k]] - mean; + residual[j] += diff * diff; + } + } + } + } + + else { +/* residual[0] = 0.; + sum = 0.; + count = 0; + + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = rank[i][int_sequence[k][i][j]] - sum / count; + residual[0] += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += rank[i][int_sequence[k][i][j]]; + } + } */ + + mean = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + mean += rank[i][int_sequence[k][i][j]]; + } + } + mean /= nb_sequence * length[0]; + + residual[0] = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = rank[i][int_sequence[k][i][j]] - mean; + residual[0] += diff * diff; + } + } + } + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { +/* index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + index_parameter_sum = seq_index_parameter[0]; + sum = int_sequence[j][i][0]; + + for (k = 1;k < length[j];k++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_sum / k; + index_parameter_square_sum += ((double)k / (double)(k + 1)) * + index_parameter_diff * index_parameter_diff; + diff = int_sequence[j][i][k] - sum / k; + square_sum += ((double)k / (double)(k + 1)) * diff * diff; + mix_square_sum += ((double)k / (double)(k + 1)) * index_parameter_diff * diff; + index_parameter_sum += seq_index_parameter[k]; + sum += int_sequence[j][i][k]; + } */ + + index_parameter_mean = 0.; + mean = 0.; + for (k = 0;k < length[j];k++) { + index_parameter_mean += seq_index_parameter[k]; + mean += int_sequence[j][i][k]; + } + index_parameter_mean /= length[j]; + mean /= length[j]; + + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + for (k = 0;k < length[j];k++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_mean; + diff = int_sequence[j][i][k] - mean; + index_parameter_square_sum += index_parameter_diff * index_parameter_diff; + square_sum += diff * diff; + mix_square_sum += index_parameter_diff * diff; + } + + if (index_parameter_square_sum > 0.) { + residual[j] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[j] = 0.; + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { +/* index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + index_parameter_sum = seq_index_parameter[0]; + sum = real_sequence[j][i][0]; + + for (k = 1;k < length[j];k++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_sum / k; + index_parameter_square_sum += ((double)k / (double)(k + 1)) * + index_parameter_diff * index_parameter_diff; + diff = real_sequence[j][i][k] - sum / k; + square_sum += ((double)k / (double)(k + 1)) * diff * diff; + mix_square_sum += ((double)k / (double)(k + 1)) * index_parameter_diff * diff; + index_parameter_sum += seq_index_parameter[k]; + sum += real_sequence[j][i][k]; + } */ + + index_parameter_mean = 0.; + mean = 0.; + for (k = 0;k < length[j];k++) { + index_parameter_mean += seq_index_parameter[k]; + mean += real_sequence[j][i][k]; + } + index_parameter_mean /= length[j]; + mean /= length[j]; + + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + for (k = 0;k < length[j];k++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_mean; + diff = real_sequence[j][i][k] - mean; + index_parameter_square_sum += index_parameter_diff * index_parameter_diff; + square_sum += diff * diff; + mix_square_sum += index_parameter_diff * diff; + } + + if (index_parameter_square_sum > 0.) { + residual[j] += square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[j] = 0.; + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { +/* index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + count = 1; + + index_parameter_sum = nb_sequence * seq_index_parameter[0]; + sum = int_sequence[0][i][0]; + for (j = 1;j < nb_sequence;j++) { + diff = int_sequence[j][i][0] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + count++; + sum += int_sequence[j][i][0]; + } + + for (j = 1;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + index_parameter_diff = seq_index_parameter[j] - index_parameter_sum / count; + index_parameter_square_sum += ((double)count / (double)(count + 1)) * + index_parameter_diff * index_parameter_diff; + diff = int_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + mix_square_sum += ((double)count / (double)(count + 1)) * index_parameter_diff * diff; + count++; + index_parameter_sum += seq_index_parameter[j]; + sum += int_sequence[k][i][j]; + } + } */ + + index_parameter_mean = 0.; + mean = 0.; + for (j = 0;j < length[0];j++) { + index_parameter_mean += seq_index_parameter[j]; + for (k = 0;k < nb_sequence;k++) { + mean += int_sequence[k][i][j]; + } + } + index_parameter_mean /= length[0]; + mean /= nb_sequence * length[0]; + + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + for (j = 0;j < length[0];j++) { + index_parameter_diff = seq_index_parameter[j] - index_parameter_mean; + index_parameter_square_sum += index_parameter_diff * index_parameter_diff; + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][i][j] - mean; + square_sum += diff * diff; + mix_square_sum += index_parameter_diff * diff; + } + } + } + + else { +/* index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + count = 1; + + index_parameter_sum = nb_sequence * seq_index_parameter[0]; + sum = real_sequence[0][i][0]; + for (j = 1;j < nb_sequence;j++) { + diff = real_sequence[j][i][0] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + count++; + sum += real_sequence[j][i][0]; + } + + for (j = 1;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + index_parameter_diff = seq_index_parameter[j] - index_parameter_sum / count; + index_parameter_square_sum += ((double)count / (double)(count + 1)) * + index_parameter_diff * index_parameter_diff; + diff = real_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + mix_square_sum += ((double)count / (double)(count + 1)) * index_parameter_diff * diff; + count++; + index_parameter_sum += seq_index_parameter[j]; + sum += real_sequence[k][i][j]; + } + } */ + + index_parameter_mean = 0.; + mean = 0.; + for (j = 0;j < length[0];j++) { + index_parameter_mean += seq_index_parameter[j]; + for (k = 0;k < nb_sequence;k++) { + mean += real_sequence[k][i][j]; + } + } + index_parameter_mean /= length[0]; + mean /= nb_sequence * length[0]; + + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + for (j = 0;j < length[0];j++) { + index_parameter_diff = seq_index_parameter[j] - index_parameter_mean; + index_parameter_square_sum += index_parameter_diff * index_parameter_diff; + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][i][j] - mean; + square_sum += diff * diff; + mix_square_sum += index_parameter_diff * diff; + } + } + } + + index_parameter_square_sum *= nb_sequence; + if (index_parameter_square_sum > 0.) { + residual[0] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[0] = 0.; + } + } + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + mean = 0.; + for (k = 0;k < length[j];k++) { + mean += int_sequence[j][i][k]; + } + mean /= length[j]; + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (k = 1;k < length[j];k++) { + diff = int_sequence[j][i][k] - mean; + shifted_diff = int_sequence[j][i][k - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[j] = square_sum; + if (shifted_square_sum > 0.) { + residual[j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + mean = 0.; + for (k = 0;k < length[j];k++) { + mean += real_sequence[j][i][k]; + } + mean /= length[j]; + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (k = 1;k < length[j];k++) { + diff = real_sequence[j][i][k] - mean; + shifted_diff = real_sequence[j][i][k - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[j] = square_sum; + if (shifted_square_sum > 0.) { + residual[j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { +// if (residual[j] > 0.) { + if (residual[j] > (length[j] - 1) * ROUNDOFF_ERROR) { + likelihood -= ((double)(length[j] - 1) / 2.) * (logl(residual[j] / (length[j] - 1)) + + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + break; + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { + mean = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + mean += int_sequence[k][i][j]; + } + } + mean /= nb_sequence * length[0]; + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (j = 1;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][i][j] - mean; + shifted_diff = int_sequence[k][i][j - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + } + + else { + mean = 0.; + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + mean += real_sequence[k][i][j]; + } + } + mean /= nb_sequence * length[0]; + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (j = 1;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][i][j] - mean; + shifted_diff = real_sequence[k][i][j - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + } + + residual[0] = square_sum; + if (shifted_square_sum > 0.) { + residual[0] -= autocovariance * autocovariance / shifted_square_sum; + } + +// if (residual[0] > 0.) { + if (residual[0] > nb_sequence * (length[0] - 1) * ROUNDOFF_ERROR) { + likelihood -= ((double)(nb_sequence * (length[0] - 1)) / 2.) * + (logl(residual[0] / (nb_sequence * (length[0] - 1))) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + break; + } + } + } + + if ((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i - 1] == LINEAR_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { +// if (residual[j] > 0.) { + if (residual[j] > length[j] * ROUNDOFF_ERROR) { + likelihood -= ((double)length[j] / 2.) * (logl(residual[j] / length[j]) + + log(2 * M_PI) + 1); +/* likelihood -= ((double)length[j] / 2.) * (logl(residual[j] / (length[j] - 1)) + + log(2 * M_PI)) - (double)(length[j] - 1) / 2.; */ + } + else { + likelihood = D_INF; + break; + } + } + } + } + + else { +// if (residual[0] > 0.) { + if (residual[0] > nb_sequence * length[0] * ROUNDOFF_ERROR) { + likelihood -= ((double)(nb_sequence * length[0]) / 2.) * + (logl(residual[0] / (nb_sequence * length[0])) + log(2 * M_PI) + 1); +/* likelihood -= ((double)(nb_sequence * length[0]) / 2.) * + (logl(residual[0] / (nb_sequence * length[0] - 1)) + + log(2 * M_PI)) - (double)(nb_sequence * length[0] - 1) / 2.; */ + } + else { + likelihood = D_INF; + break; + } + } + } + + if (likelihood == D_INF) { + break; + } + } + + if ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if (index != I_DEFAULT) { +// if (residual[index] > 0.) { + if (residual[index] > length[index] * ROUNDOFF_ERROR) { + likelihood -= ((double)length[index] / 2.) * (logl(residual[index] / length[index]) + + log(2 * M_PI) + 1); +/* likelihood -= ((double)length[index] / 2.) * (logl(residual[index] / (length[index] - 1)) + + log(2 * M_PI)) - (double)(length[index] - 1) / 2.; */ + } + else { + likelihood = D_INF; + } + } + + else { + if (!common_contrast) { + for (i = 1;i < nb_sequence;i++) { + residual[0] += residual[i]; + } + } + +// if (residual[0] > 0.) { + if (residual[0] > nb_sequence * length[0] * ROUNDOFF_ERROR) { + likelihood = -((double)(nb_sequence * length[0]) / 2.) * + (logl(residual[0] / (nb_sequence * length[0])) + log(2 * M_PI) + 1); +/* likelihood = -((double)(nb_sequence * length[0]) / 2.) * + (logl(residual[0] / (nb_sequence * (length[0] - 1))) + + log(2 * M_PI)) - (double)(nb_sequence * (length[0] - 1)) / 2.; */ + } + else { + likelihood = D_INF; + } + } + } + + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < length[i];j++) { + int_sequence[i][0][j] = 0; + } + } + } + + delete [] frequency; + delete [] residual; + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of piecewise linear functions. + * + * \param[in] index sequence index, + * \param[in] variable variable index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model type, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] change_point change points, + * \param[in] seq_index_parameter index parameters, + * \param[in] piecewise_function piecewise linear functions, + * \param[in] imean segment means, + * \param[in] variance segment variances or residual variances, + * \param[in] global_variance global variance or residual variance, + * \param[in] iintercept segment intercepts, + * \param[in] islope segment slopes, + * \param[in] autoregressive_coeff segment autoregressive coefficient, + * \param[in] correlation segment correlation coefficients (for linear models), + * \param[in] slope_standard_deviation segment slope standard deviations (for linear models), + * \param[in] iindex_parameter_mean segment index parameter mean (for linear models), + * \param[in] iindex_parameter_variance segment index parameter variance (for linear models), + * \param[in] determination_coeff coefficient of determination (for autoregressive models). + * + * \return log-likelihood of the piecewise linear function. + */ +/*--------------------------------------------------------------*/ + +double Sequences::piecewise_linear_function(int index , int variable , int nb_segment , segment_model model_type , + bool common_contrast , int *change_point , int *seq_index_parameter , + double **piecewise_function , double **imean , double **variance , + double *global_variance , double **iintercept , double **islope , + double **iautoregressive_coeff , double **correlation , + double **slope_standard_deviation , double **iindex_parameter_mean , + long double **iindex_parameter_variance , double **determination_coeff) const + +{ + int i , j , k; + double likelihood , mean , diff , diff_sum , index_parameter_mean , response_mean , shifted_diff , + slope , intercept , autoregressive_coeff , *individual_mean , *rank; + long double square_sum , global_square_sum , index_parameter_variance , response_variance , covariance , + shifted_square_sum , autocovariance , residual_square_sum , mean_squared_error_1; + +# ifdef MESSAGE + long double mean_squared_error; +# endif + + + if ((model_type == POISSON_CHANGE) || (model_type == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type == GAUSSIAN_CHANGE) || + (model_type == MEAN_CHANGE) || (model_type == VARIANCE_CHANGE) || + (model_type == BAYESIAN_POISSON_CHANGE) || (model_type == BAYESIAN_GAUSSIAN_CHANGE)) { + if (((model_type == GAUSSIAN_CHANGE) || (model_type == VARIANCE_CHANGE)) && ((variance) || (global_variance))) { + likelihood = 0.; + } + else { + likelihood = D_INF; + } + + if (global_variance) { + global_square_sum = 0.; + } + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + if (model_type == VARIANCE_CHANGE) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (j = 0;j < length[i];j++) { + mean += int_sequence[i][variable][j]; + } + } + else { + for (j = 0;j < length[i];j++) { + mean += real_sequence[i][variable][j]; + } + } + mean /= length[i]; + } + + for (j = 0;j < nb_segment;j++) { + if (model_type != VARIANCE_CHANGE) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (k = change_point[j];k < change_point[j + 1];k++) { + mean += int_sequence[i][variable][k]; + } + } + else { + for (k = change_point[j];k < change_point[j + 1];k++) { + mean += real_sequence[i][variable][k]; + } + } + mean /= (change_point[j + 1] - change_point[j]); + } + + if (imean) { + imean[i][j] = mean; + } + if (piecewise_function) { + for (k = change_point[j];k < change_point[j + 1];k++) { + piecewise_function[i][k] = mean; + } + } + + if ((variance) || (global_variance)) { + square_sum = 0.; + + if (change_point[j + 1] > change_point[j] + 1) { + if (type[variable] != REAL_VALUE) { + for (k = change_point[j];k < change_point[j + 1];k++) { + diff = int_sequence[i][variable][k] - mean; + square_sum += diff * diff; + } + } + else { + for (k = change_point[j];k < change_point[j + 1];k++) { + diff = real_sequence[i][variable][k] - mean; + square_sum += diff * diff; + } + } + + if (global_variance) { + global_square_sum += square_sum; + } + variance[i][j] = square_sum / ((change_point[j + 1] - change_point[j]) - 1); + + if (((model_type == GAUSSIAN_CHANGE) || (model_type == VARIANCE_CHANGE)) && (likelihood != D_INF)) { + if (square_sum > (change_point[j + 1] - change_point[j]) * ROUNDOFF_ERROR) { + likelihood -= ((double)(change_point[j + 1] - change_point[j]) / 2.) * (log(square_sum / + (change_point[j + 1] - change_point[j])) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + } + } + } + } + } + } + + else { + individual_mean = new double[nb_sequence]; + + // rank variance decomposition + + if (((model_type == POISSON_CHANGE) || (model_type == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type == BAYESIAN_POISSON_CHANGE)) && (variance)) { + rank = marginal_distribution[variable]->rank_computation(); + + for (i = 0;i < nb_segment;i++) { + mean = 0.; + for (j = 0;j < nb_sequence;j++) { + individual_mean[j] = 0.; + for (k = change_point[i];k < change_point[i + 1];k++) { + individual_mean[j] += rank[int_sequence[j][variable][k]]; + } + mean += individual_mean[j]; + individual_mean[j] /= (change_point[i + 1] - change_point[i]); + } + mean /= (nb_sequence * (change_point[i + 1] - change_point[i])); + + square_sum = 0.; + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + diff = rank[int_sequence[j][variable][k]] - mean; + square_sum += diff * diff; + } + } + variance[1][i] = square_sum / (nb_sequence * (change_point[i + 1] - change_point[i])); + + square_sum = 0.; + for (j = 0;j < nb_sequence;j++) { + diff = individual_mean[j] - mean; + square_sum += diff * diff; + } + variance[2][i] = square_sum / nb_sequence; + + square_sum = 0.; + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + diff = rank[int_sequence[j][variable][k]] - individual_mean[j]; + square_sum += diff * diff; + } + } + variance[3][i] = square_sum / (nb_sequence * (change_point[i + 1] - change_point[i])); + } + + delete [] rank; + } + + if (model_type == VARIANCE_CHANGE) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (i = 0;i < nb_sequence;i++) { + individual_mean[i] = 0.; + for (j = 0;j < length[0];j++) { + individual_mean[i] += int_sequence[i][variable][j]; + } + mean += individual_mean[i]; + individual_mean[i] /= length[0]; + } + } + else { + for (i = 0;i < nb_sequence;i++) { + individual_mean[i] = 0.; + for (j = 0;j < length[0];j++) { + individual_mean[i] += real_sequence[i][variable][j]; + } + mean += individual_mean[i]; + individual_mean[i] /= length[0]; + } + } + mean /= (nb_sequence * length[0]); + } + + for (i = 0;i < nb_segment;i++) { + if (model_type != VARIANCE_CHANGE) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + individual_mean[j] = 0.; + for (k = change_point[i];k < change_point[i + 1];k++) { + individual_mean[j] += int_sequence[j][variable][k]; + } + mean += individual_mean[j]; + individual_mean[j] /= (change_point[i + 1] - change_point[i]); + } + } + else { + for (j = 0;j < nb_sequence;j++) { + individual_mean[j] = 0.; + for (k = change_point[i];k < change_point[i + 1];k++) { + individual_mean[j] += real_sequence[j][variable][k]; + } + mean += individual_mean[j]; + individual_mean[j] /= (change_point[i + 1] - change_point[i]); + } + } + mean /= (nb_sequence * (change_point[i + 1] - change_point[i])); + } + + if (imean) { + imean[0][i] = mean; + } + if (piecewise_function) { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + piecewise_function[j][k] = mean; + } + } + } + + if ((variance) || (global_variance)) { + square_sum = 0.; + + if (type[variable] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + diff = int_sequence[j][variable][k] - mean; + square_sum += diff * diff; + } + } + } + else { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + diff = real_sequence[j][variable][k] - mean; + square_sum += diff * diff; + } + } + } + + if (global_variance) { + global_square_sum += square_sum; + } + variance[0][i] = square_sum / (nb_sequence * (change_point[i + 1] - change_point[i]) - 1); + + if (((model_type == GAUSSIAN_CHANGE) || (model_type == VARIANCE_CHANGE)) && (likelihood != D_INF)) { + if (square_sum > nb_sequence * (change_point[i + 1] - change_point[i]) * ROUNDOFF_ERROR) { + likelihood -= ((double)(nb_sequence * (change_point[i + 1] - change_point[i])) / 2.) * (log(square_sum / + (nb_sequence * (change_point[i + 1] - change_point[i]))) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + + // variance decomposition + + if ((model_type == GAUSSIAN_CHANGE) || (model_type == MEAN_CHANGE) || + (model_type == BAYESIAN_GAUSSIAN_CHANGE)) { + variance[1][i] = square_sum / (nb_sequence * (change_point[i + 1] - change_point[i])); + + square_sum = 0.; + for (j = 0;j < nb_sequence;j++) { + diff = individual_mean[j] - mean; + square_sum += diff * diff; + } + variance[2][i] = square_sum / nb_sequence; + + square_sum = 0.; + + if (type[variable] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + diff = int_sequence[j][variable][k] - individual_mean[j]; + square_sum += diff * diff; + } + } + } + else { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + diff = real_sequence[j][variable][k] - individual_mean[j]; + square_sum += diff * diff; + } + } + } + + variance[3][i] = square_sum / (nb_sequence * (change_point[i + 1] - change_point[i])); + } + } + } + + delete [] individual_mean; + } + + if (global_variance) { + if (model_type == MEAN_CHANGE) { + if (index != I_DEFAULT) { + global_variance[variable] = global_square_sum / (length[index] - nb_segment); + } + else { + global_variance[variable] = global_square_sum / (nb_sequence * length[0] - nb_segment); + } + + if (index != I_DEFAULT) { + if (global_square_sum > length[index] * ROUNDOFF_ERROR) { + likelihood = -((double)length[index] / 2.) * (log(global_square_sum / + length[index]) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + + else { + if (global_square_sum > nb_sequence * length[0] * ROUNDOFF_ERROR) { + likelihood = -((double)(nb_sequence * length[0]) / 2.) * (log(global_square_sum / + (nb_sequence * length[0])) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + } + + // computation of mean squared error + + else { + if (index != I_DEFAULT) { + global_variance[variable] = global_square_sum / length[index]; + } + else { + global_variance[variable] = global_square_sum / (nb_sequence * length[0]); + } + } + } + } + + else if ((model_type == LINEAR_MODEL_CHANGE) || (model_type == INTERCEPT_SLOPE_CHANGE)) { + if ((model_type == LINEAR_MODEL_CHANGE) && ((variance) || (global_variance))) { + likelihood = 0.; + } + else { + likelihood = D_INF; + } + + if (global_variance) { + global_square_sum = 0.; + } + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < nb_segment;j++) { + index_parameter_mean = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + index_parameter_mean += seq_index_parameter[k]; + } + index_parameter_mean /= (change_point[j + 1] - change_point[j]); + if (iindex_parameter_mean) { + iindex_parameter_mean[i][j] = index_parameter_mean; + } + + index_parameter_variance = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + diff = seq_index_parameter[k] - index_parameter_mean; + index_parameter_variance += diff * diff; + } + if (iindex_parameter_variance) { + iindex_parameter_variance[i][j] = index_parameter_variance; + } + + response_mean = 0.; + response_variance = 0.; + covariance = 0.; + + if (type[variable] != REAL_VALUE) { + for (k = change_point[j];k < change_point[j + 1];k++) { + response_mean += int_sequence[i][variable][k]; + } + response_mean /= (change_point[j + 1] - change_point[j]); + + for (k = change_point[j];k < change_point[j + 1];k++) { + diff = int_sequence[i][variable][k] - response_mean; + response_variance += diff * diff; + covariance += (seq_index_parameter[k] - index_parameter_mean) * diff; + } + } + + else { + for (k = change_point[j];k < change_point[j + 1];k++) { + response_mean += real_sequence[i][variable][k]; + } + response_mean /= (change_point[j + 1] - change_point[j]); + + for (k = change_point[j];k < change_point[j + 1];k++) { + diff = real_sequence[i][variable][k] - response_mean; + response_variance += diff * diff; + covariance += (seq_index_parameter[k] - index_parameter_mean) * diff; + } + } + + slope = covariance / index_parameter_variance; + intercept = response_mean - slope * index_parameter_mean; + + if ((islope) && (iintercept)) { + iintercept[i][j] = intercept; + islope[i][j] = slope; + } + if (correlation) { + correlation[i][j] = covariance / sqrt(response_variance * index_parameter_variance); + } + + if (piecewise_function) { + for (k = change_point[j];k < change_point[j + 1];k++) { + piecewise_function[i][k] = intercept + slope * seq_index_parameter[k]; + } + } + + if ((variance) || (global_variance)) { + if (change_point[j + 1] > change_point[j] + 2) { + square_sum = 0.; + + if (type[variable] != REAL_VALUE) { + for (k = change_point[j];k < change_point[j + 1];k++) { + diff = int_sequence[i][variable][k] - (intercept + slope * seq_index_parameter[k]); + square_sum += diff * diff; + } + } + + else { + for (k = change_point[j];k < change_point[j + 1];k++) { + diff = real_sequence[i][variable][k] - (intercept + slope * seq_index_parameter[k]); + square_sum += diff * diff; + } + } + + if ((global_variance) || (model_type == INTERCEPT_SLOPE_CHANGE)) { + global_square_sum += square_sum; + } + + variance[i][j] = square_sum / (change_point[j + 1] - change_point[j] - 2); + + if ((model_type == LINEAR_MODEL_CHANGE) && (likelihood != D_INF)) { + if (square_sum > (change_point[j + 1] - change_point[j]) * ROUNDOFF_ERROR) { + likelihood -= ((double)(change_point[j + 1] - change_point[j]) / 2.) * (log(square_sum / + (change_point[j + 1] - change_point[j])) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + + if (slope_standard_deviation) { + square_sum /= (change_point[j + 1] - change_point[j] - 2); + slope_standard_deviation[i][j] = sqrt(square_sum / index_parameter_variance); + } + } + + else { + variance[i][j] = 0.; + } + } + } + } + } + } + + else { + for (i = 0;i < nb_segment;i++) { + index_parameter_mean = 0.; + for (j = change_point[i];j < change_point[i + 1];j++) { + index_parameter_mean += seq_index_parameter[j]; + } + index_parameter_mean /= (change_point[i + 1] - change_point[i]); + if (iindex_parameter_mean) { + iindex_parameter_mean[0][i] = index_parameter_mean; + } + + index_parameter_variance = 0.; + for (j = change_point[i];j < change_point[i + 1];j++) { + diff = seq_index_parameter[j] - index_parameter_mean; + index_parameter_variance += diff * diff; + } + index_parameter_variance *= nb_sequence; + if (iindex_parameter_variance) { + iindex_parameter_variance[0][i] = index_parameter_variance; + } + + response_mean = 0.; + response_variance = 0.; + covariance = 0.; + + if (type[variable] != REAL_VALUE) { + for (j = change_point[i];j < change_point[i + 1];j++) { + for (k = 0;k < nb_sequence;k++) { + response_mean += int_sequence[k][variable][j]; + } + } + response_mean /= (nb_sequence * (change_point[i + 1] - change_point[i])); + + for (j = change_point[i];j < change_point[i + 1];j++) { + diff_sum = 0.; + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][variable][j] - response_mean; + response_variance += diff * diff; + diff_sum += diff; + } + covariance += (seq_index_parameter[j] - index_parameter_mean) * diff_sum; + } + } + + else { + for (j = change_point[i];j < change_point[i + 1];j++) { + for (k = 0;k < nb_sequence;k++) { + response_mean += real_sequence[k][variable][j]; + } + } + response_mean /= (nb_sequence * (change_point[i + 1] - change_point[i])); + + for (j = change_point[i];j < change_point[i + 1];j++) { + diff_sum = 0.; + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][variable][j] - response_mean; + response_variance += diff * diff; + diff_sum += diff; + } + covariance += (seq_index_parameter[j] - index_parameter_mean) * diff_sum; + } + } + + slope = covariance / index_parameter_variance; + intercept = response_mean - slope * index_parameter_mean; + + if ((islope) && (iintercept)) { + iintercept[0][i] = intercept; + islope[0][i] = slope; + } + if (correlation) { + correlation[0][i] = covariance / sqrt(response_variance * index_parameter_variance); + } + + if (piecewise_function) { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + piecewise_function[j][k] = intercept + slope * seq_index_parameter[k]; + } + } + } + + if ((variance) || (global_variance)) { + if (nb_sequence * (change_point[i + 1] - change_point[i]) > 2) { + square_sum = 0.; + + if (type[variable] != REAL_VALUE) { + for (j = change_point[i];j < change_point[i + 1];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][variable][j] - (intercept + slope * seq_index_parameter[j]); + square_sum += diff * diff; + } + } + } + + else { + for (j = change_point[i];j < change_point[i + 1];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][variable][j] - (intercept + slope * seq_index_parameter[j]); + square_sum += diff * diff; + } + } + } + + if ((global_variance) || (model_type == INTERCEPT_SLOPE_CHANGE)) { + global_square_sum += square_sum; + } + + variance[0][i] = square_sum / (nb_sequence * (change_point[i + 1] - change_point[i]) - 2); + + if ((model_type == LINEAR_MODEL_CHANGE) && (likelihood != D_INF)) { + if (square_sum > nb_sequence * (change_point[i + 1] - change_point[i]) * ROUNDOFF_ERROR) { + likelihood -= ((double)(nb_sequence * (change_point[i + 1] - change_point[i])) / 2.) * (log(square_sum / + (nb_sequence * (change_point[i + 1] - change_point[i]))) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + + if (slope_standard_deviation) { + square_sum /= (nb_sequence * (change_point[i + 1] - change_point[i]) - 2); + slope_standard_deviation[0][i] = sqrt(square_sum / index_parameter_variance); + } + } + + else { + variance[0][i] = 0.; + } + } + } + } + + if (global_variance) { + if (model_type == INTERCEPT_SLOPE_CHANGE) { + if (index != I_DEFAULT) { + global_variance[variable] = global_square_sum / (length[index] - 2 * nb_segment); + } + else { + global_variance[variable] = global_square_sum / (nb_sequence * length[0] - 2 * nb_segment); + } + + if (index != I_DEFAULT) { + if (global_square_sum > length[index] * ROUNDOFF_ERROR) { + likelihood = -((double)length[index] / 2.) * (log(global_square_sum / + length[index]) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + + else { + if (global_square_sum > nb_sequence * length[0] * ROUNDOFF_ERROR) { + likelihood = -((double)(nb_sequence * length[0]) / 2.) * (log(global_square_sum / + (nb_sequence * length[0])) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + } + + // computation of mean squared error + + else { + if (index != I_DEFAULT) { + global_variance[variable] = global_square_sum / length[index]; + } + else { + global_variance[variable] = global_square_sum / (nb_sequence * length[0]); + } + } + } + } + + else if ((model_type == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if (variance) { + likelihood = 0.; + } + else { + likelihood = D_INF; + } + + if (global_variance) { + mean_squared_error_1 = 0.; + global_square_sum = 0.; + +# ifdef MESSAGE + mean_squared_error = 0.; +# endif + + } + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + if (model_type == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (j = 0;j < length[i];j++) { + mean += int_sequence[i][variable][j]; + } + } + else { + for (j = 0;j < length[i];j++) { + mean += real_sequence[i][variable][j]; + } + } + mean /= length[i]; + } + + for (j = 0;j < nb_segment;j++) { + if (model_type == AUTOREGRESSIVE_MODEL_CHANGE) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (k = change_point[j];k < change_point[j + 1];k++) { + mean += int_sequence[i][variable][k]; + } + } + else { + for (k = change_point[j];k < change_point[j + 1];k++) { + mean += real_sequence[i][variable][k]; + } + } + mean /= (change_point[j + 1] - change_point[j]); + } + + if (imean) { + imean[i][j] = mean; + } + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + if (type[variable] != REAL_VALUE) { + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + diff = int_sequence[i][variable][k] - mean; + shifted_diff = int_sequence[i][variable][k - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + else { + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + diff = real_sequence[i][variable][k] - mean; + shifted_diff = real_sequence[i][variable][k - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + + if (shifted_square_sum > 0.) { + autoregressive_coeff = autocovariance / shifted_square_sum; + if (autoregressive_coeff < -1.) { + autoregressive_coeff = -1.; + } + else if (autoregressive_coeff > 1.) { + autoregressive_coeff = 1.; + } + + if (iautoregressive_coeff) { + iautoregressive_coeff[i][j] = autoregressive_coeff; + } + + if (piecewise_function) { + piecewise_function[i][change_point[j]] = mean; + + if (type[variable] != REAL_VALUE) { + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + piecewise_function[i][k] = mean + autoregressive_coeff * (int_sequence[i][variable][k - 1] - mean); + } + } + else { + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + piecewise_function[i][k] = mean + autoregressive_coeff * (real_sequence[i][variable][k - 1] - mean); + } + } + } + + if (global_variance) { + if (type[variable] != REAL_VALUE) { + diff = int_sequence[i][variable][change_point[j]] - mean; + mean_squared_error_1 += diff * diff; + +# ifdef MESSAGE + mean_squared_error += diff * diff; + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + diff = int_sequence[i][variable][k] - (mean + autoregressive_coeff * (int_sequence[i][variable][k - 1] - mean)); + mean_squared_error += diff * diff; + } +# endif + + } + + else { + diff = real_sequence[i][variable][change_point[j]] - mean; + mean_squared_error_1 += diff * diff; + +# ifdef MESSAGE + mean_squared_error += diff * diff; + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + diff = real_sequence[i][variable][k] - (mean + autoregressive_coeff * (real_sequence[i][variable][k - 1] - mean)); + mean_squared_error += diff * diff; + } +# endif + + } + } + + if ((variance) || (global_variance)) { + residual_square_sum = square_sum - autocovariance * autocovariance / shifted_square_sum; + if (global_variance) { + global_square_sum += residual_square_sum; + } +// variance[i][j] = residual_square_sum / (change_point[j + 1] - change_point[j] - 3); + variance[i][j] = residual_square_sum / (change_point[j + 1] - change_point[j] - 2); + + if (determination_coeff) { + determination_coeff[i][j] = 1.; + if (square_sum > 0.) { + determination_coeff[i][j] -= residual_square_sum / square_sum; + } + } + + if (likelihood != D_INF) { + if (residual_square_sum > (change_point[j + 1] - change_point[j] - 1) * ROUNDOFF_ERROR) { + likelihood -= ((double)(change_point[j + 1] - change_point[j] - 1) / 2.) *(log(residual_square_sum / + (change_point[j + 1] - change_point[j] - 1)) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + } + } + + else { + if (iautoregressive_coeff) { + iautoregressive_coeff[i][j] = 0.; + } + + if (piecewise_function) { + for (k = change_point[j];k < change_point[j + 1];k++) { + piecewise_function[i][k] = mean; + } + } + + if (variance) { + variance[i][j] = D_DEFAULT; + if (determination_coeff) { + determination_coeff[i][j] = 0.; + } + likelihood = D_INF; + } + } + } + } + } + } + + else { + if (model_type == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[0];j++) { + mean += int_sequence[i][variable][j]; + } + } + } + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[0];j++) { + mean += real_sequence[i][variable][j]; + } + } + } + mean /= (nb_sequence * length[0]); + } + + for (i = 0;i < nb_segment;i++) { + if (model_type == AUTOREGRESSIVE_MODEL_CHANGE) { + mean = 0.; + + if (type[variable] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + mean += int_sequence[j][variable][k]; + } + } + } + else { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + mean += real_sequence[j][variable][k]; + } + } + } + mean /= (nb_sequence * (change_point[i + 1] - change_point[i])); + } + + if (imean) { + imean[0][i] = mean; + } + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + if (type[variable] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i] + 1;k < change_point[i + 1];k++) { + diff = int_sequence[j][variable][k] - mean; + shifted_diff = int_sequence[j][variable][k - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + } + else { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i] + 1;k < change_point[i + 1];k++) { + diff = real_sequence[j][variable][k] - mean; + shifted_diff = real_sequence[j][variable][k - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + } + + if (shifted_square_sum > 0.) { + autoregressive_coeff = autocovariance / shifted_square_sum; + if (autoregressive_coeff < -1.) { + autoregressive_coeff = -1.; + } + else if (autoregressive_coeff > 1.) { + autoregressive_coeff = 1.; + } + + if (iautoregressive_coeff) { + iautoregressive_coeff[0][i] = autoregressive_coeff; + } + + if (piecewise_function) { + for (j = 0;j < nb_sequence;j++) { + piecewise_function[j][change_point[i]] = mean; + + if (type[variable] != REAL_VALUE) { + for (k = change_point[i] + 1;k < change_point[i + 1];k++) { + piecewise_function[j][k] = mean + autoregressive_coeff * (int_sequence[j][variable][k - 1] - mean); + } + } + else { + for (k = change_point[i] + 1;k < change_point[i + 1];k++) { + piecewise_function[j][k] = mean + autoregressive_coeff * (real_sequence[j][variable][k - 1] - mean); + } + } + } + } + + if (global_variance) { + for (j = 0;j < nb_sequence;j++) { + if (type[variable] != REAL_VALUE) { + diff = int_sequence[j][variable][change_point[i]] - mean; + mean_squared_error_1 += diff * diff; + +# ifdef MESSAGE + mean_squared_error += diff * diff; + for (k = change_point[i] + 1;k < change_point[i + 1];k++) { + diff = int_sequence[j][variable][k] - (mean + autoregressive_coeff * (int_sequence[j][variable][k - 1] - mean)); + mean_squared_error += diff * diff; + } +# endif + + } + else { + diff = real_sequence[j][variable][change_point[i]] - mean; + mean_squared_error_1 += diff * diff; + +# ifdef MESSAGE + mean_squared_error += diff * diff; + for (k = change_point[i] + 1;k < change_point[i + 1];k++) { + diff= real_sequence[j][variable][k] - (mean + autoregressive_coeff * (real_sequence[j][variable][k - 1] - mean)); + mean_squared_error += diff * diff; + } +# endif + + } + } + } + + if ((variance) || (global_variance)) { + residual_square_sum = square_sum - autocovariance * autocovariance / shifted_square_sum; + if (global_variance) { + global_square_sum += residual_square_sum; + } + +// variance[0][i] = residual_square_sum / (nb_sequence * (change_point[i + 1] - change_point[i] - 1) - 2); + variance[0][i] = residual_square_sum / (nb_sequence * (change_point[i + 1] - change_point[i] - 1) - 1); + + if (determination_coeff) { + determination_coeff[0][i] = 1.; + if (square_sum > 0.) { + determination_coeff[0][i] -= residual_square_sum / square_sum; + } + } + + if (likelihood != D_INF) { + if (residual_square_sum > nb_sequence * (change_point[i + 1] - change_point[i] - 1) * ROUNDOFF_ERROR) { + likelihood -= ((double)(nb_sequence * (change_point[i + 1] - change_point[i] - 1)) / 2.) * (log(residual_square_sum / + (nb_sequence * (change_point[i + 1] - change_point[i] - 1))) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + } + } + + else { + if (iautoregressive_coeff) { + iautoregressive_coeff[0][i] = 0.; + } + + if (piecewise_function) { + for (j = 0;j < nb_sequence;j++) { + for (k = change_point[i];k < change_point[i + 1];k++) { + piecewise_function[j][k] = mean; + } + } + } + + if (variance) { + variance[0][i] = D_DEFAULT; + if (determination_coeff) { + determination_coeff[0][i] = 0.; + } + likelihood = D_INF; + } + } + } + } + + // computation of mean squared error + + if (global_variance) { + if (index != I_DEFAULT) { +// global_variance[variable] = global_square_sum / (length[index] - 3 * nb_segment); +// global_variance[variable] = global_square_sum / (length[index] - 2 * nb_segment); + global_variance[variable] = (mean_squared_error_1 + global_square_sum) / length[index]; + + } + + else { +// global_variance[variable] = global_square_sum / (nb_sequence * (length[0] - nb_segment) - 2 * nb_segment); +// global_variance[variable] = global_square_sum / (nb_sequence * (length[0] - nb_segment) - nb_segment); + global_variance[variable] = (mean_squared_error_1 + global_square_sum) / (nb_sequence * length[0]); + } + +# ifdef MESSAGE + if ((model_type == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if (index != I_DEFAULT) { + mean_squared_error /= length[index]; + } + else { + mean_squared_error /= (nb_sequence * length[0]); + } + + if ((global_variance[variable] < mean_squared_error - DOUBLE_ERROR) || + (global_variance[variable] > mean_squared_error + DOUBLE_ERROR)) { + cout << "\nERROR " << SEQ_label[SEQL_ROOT_MEAN_SQUARE_ERROR] << ": " << sqrt(global_variance[variable]) << " | " + << sqrtl(mean_squared_error) << endl; + } + } +# endif + + } + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of piecewise linear functions. + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] variable variable index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model type, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] change_point change points, + * \param[in] seq_index_parameter index parameters, + * \param[in] mean segment means, + * \param[in] variance segment variances or residual variances, + * \param[in] intercept segment intercepts, + * \param[in] slope segment slopes, + * \param[in] autoregressive_coeff segment autoregressive coefficient, + * \param[in] correlation segment correlation coefficients (for linear models), + * \param[in] slope_standard_deviation segment slope standard deviations (for linear models). + * \param[in] index_parameter_mean segment index parameter mean (for linear models), + * \param[in] index_parameter_variance segment index parameter variance (for linear models), + * \param[in] determination_coeff coefficient of determination (for autoregressive models). + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::piecewise_linear_function_ascii_print(ostream &os , int index , int variable , int nb_segment , + segment_model model_type , bool common_contrast , int *change_point , + int *seq_index_parameter , double **mean , double **variance , + double **intercept , double **slope , double **autoregressive_coeff , + double **correlation , double **slope_standard_deviation , + double **index_parameter_mean , long double **index_parameter_variance , + double **determination_coeff) const + +{ + int i , j; + double diff , buff; + Test *test; + + + if ((model_type == POISSON_CHANGE) || (model_type == BAYESIAN_POISSON_CHANGE)) { + if (nb_variable > 2) { + os << STAT_label[STATL_VARIABLE] << " " << variable << " "; + } + os << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_MEAN] << ", " + << STAT_label[STATL_VARIANCE] << ": "; + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < nb_segment;j++) { + os << mean[i][j] << " " << variance[i][j]; + if (j < nb_segment - 1) { + os << " | "; + } + else if ((index == I_DEFAULT) && (i < nb_sequence - 1)) { + os << endl; + } + } + } + } + } + + else { + for (i = 0;i < nb_segment;i++) { + os << mean[0][i] << " " << variance[0][i]; + if (variance[1][i] > 0.) { + os << " (" << 100 * variance[2][i] / variance[1][i] << "%, " << 100 * variance[3][i] / variance[1][i] << "%)"; + } + if (i < nb_segment - 1) { + os << " | "; + } + } + } + os << endl; + } + + else if ((model_type == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type == GAUSSIAN_CHANGE) || (model_type == MEAN_CHANGE) || + (model_type == VARIANCE_CHANGE) || (model_type == BAYESIAN_GAUSSIAN_CHANGE)) { + if (nb_variable > 2) { + os << STAT_label[STATL_VARIABLE] << " " << variable << " "; + } + + if ((index != I_DEFAULT) || (!common_contrast)) { + os << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_MEAN] << ", " + << STAT_label[STATL_STANDARD_DEVIATION] << ": "; + + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < nb_segment;j++) { + os << mean[i][j] << " " << sqrt(variance[i][j]); + if (j < nb_segment - 1) { + os << " | "; + } + else if ((index == I_DEFAULT) && (i < nb_sequence - 1)) { + os << endl; + } + } + } + } + } + + else { + os << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_MEAN] << ", " + << STAT_label[STATL_STANDARD_DEVIATION]; + if (model_type != VARIANCE_CHANGE) { + os << ", " << STAT_label[STATL_VARIANCE]; + } + os << ": "; + + for (i = 0;i < nb_segment;i++) { + os << mean[0][i] << " " << sqrt(variance[0][i]); + if ((model_type != VARIANCE_CHANGE) && (variance[1][i] > 0.)) { + os << " " << variance[0][i] << " (" << 100 * variance[2][i] / variance[1][i] << "%, " + << 100 * variance[3][i] / variance[1][i] << "%)"; + } + if (i < nb_segment - 1) { + os << " | "; + } + } + } + os << endl; + } + + else if ((model_type == LINEAR_MODEL_CHANGE) || (model_type == INTERCEPT_SLOPE_CHANGE)) { + if (nb_variable > 2) { + os << STAT_label[STATL_VARIABLE] << " " << variable << " "; + } + + if ((index == I_DEFAULT) && (!common_contrast)) { + os << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_INTERCEPT] << ", " + << STAT_label[STATL_SLOPE] << ", " << STAT_label[STATL_RESIDUAL] << " " + << STAT_label[STATL_STANDARD_DEVIATION] << ": "; + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_segment;j++) { + os << intercept[i][j] << " " << slope[i][j] << " " << sqrt(variance[i][j]); + if (j < nb_segment - 1) { + os << " | "; + } + } + os << endl; + } + } + + else { + if ((correlation) && (slope_standard_deviation)) { + os << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_INTERCEPT] << ", " + << STAT_label[STATL_SLOPE] << " (" << SEQ_label[SEQL_CONFIDENCE_INTERVAL] << "), " + << STAT_label[STATL_CORRELATION_COEFF] << " (" << STAT_label[STATL_LIMIT_CORRELATION_COEFF] << "), " + << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ", " + << SEQ_label[SEQL_CHANGE_POINT_AMPLITUDE] << " (" << SEQ_label[SEQL_CONFIDENCE_INTERVALS] << ")" << endl; + + if (index != I_DEFAULT) { + test = new Test(STUDENT , false , change_point[1] - change_point[0] - 2 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + for (i = 0;i < nb_segment;i++) { + os << intercept[index][i] << ", " << slope[index][i]; + if (slope_standard_deviation[index][i] > 0.) { + os << " (" << slope[index][i] - test->value * slope_standard_deviation[index][i] << ", " + << slope[index][i] + test->value * slope_standard_deviation[index][i] << ")"; +// << " (slope_standard_deviation: " << slope_standard_deviation[index][i] << ")"; + } + os << ", " << correlation[index][i] << " (-/+" + << test->value / sqrt(test->value * test->value + change_point[i + 1] - change_point[i] - 2) + << "), " << sqrt(variance[index][i]); + + if (i < nb_segment - 1) { + os << ", " << intercept[index][i + 1] + slope[index][i + 1] * seq_index_parameter[change_point[i + 1]] - + (intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]]) << " ("; + + diff = seq_index_parameter[change_point[i + 1]] - index_parameter_mean[index][i]; + buff = test->value * sqrt(variance[index][i] * (1. / (double)(change_point[i + 1] - change_point[i]) + + diff * diff / index_parameter_variance[index][i])); + +// os << test->value << ", "; + os << MAX(intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]] - buff , 0) << ", " + << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]] + buff << " | "; + + delete test; + + test = new Test(STUDENT , false , change_point[i + 2] - change_point[i + 1] - 2 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + diff = seq_index_parameter[change_point[i + 1]] - index_parameter_mean[index][i + 1]; + buff = test->value * sqrt(variance[index][i + 1] * (1. / (double)(change_point[i + 2] - change_point[i + 1]) + + diff * diff / index_parameter_variance[index][i + 1])); + os << MAX(intercept[index][i + 1] + slope[index][i + 1] * seq_index_parameter[change_point[i + 1]] - buff , 0) << ", " + << intercept[index][i + 1] + slope[index][i + 1] * seq_index_parameter[change_point[i + 1]] + buff << ")"; + } + os << endl; + } + + delete test; + + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << ": "; + for (i = 0;i < nb_segment;i++) { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i]] << " -> "; + if (i < nb_segment - 1) { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]] << " | "; + } + else { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1] - 1] << endl; + } + } + } + + else if (common_contrast) { + test = new Test(STUDENT , false , nb_sequence * (change_point[1] - change_point[0]) - 2 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + for (i = 0;i < nb_segment;i++) { + os << intercept[0][i] << ", " << slope[0][i]; + if (slope_standard_deviation[0][i] > 0.) { + os << " (" << slope[0][i] - test->value * slope_standard_deviation[0][i] << ", " + << slope[0][i] + test->value * slope_standard_deviation[0][i] << ")"; +// << " (slope_standard_deviation: " << slope_standard_deviation[0][i] << ")"; + } + os << ", " << correlation[0][i] << " (-/+" + << test->value / sqrt(test->value * test->value + nb_sequence * (change_point[i + 1] - change_point[i]) - 2) + << "), " << sqrt(variance[0][i]); + + if (i < nb_segment - 1) { + os << ", " << intercept[0][i + 1] + slope[0][i + 1] * seq_index_parameter[change_point[i + 1]] - + (intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]]) << " ("; + + diff = seq_index_parameter[change_point[i + 1]] - index_parameter_mean[0][i]; + buff = test->value * sqrt(variance[0][i] * (1. / (double)(nb_sequence * (change_point[i + 1] - change_point[i])) + + diff * diff / index_parameter_variance[0][i])); + +// os << test->value << ", "; + os << MAX(intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]] - buff , 0) << ", " + << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]] + buff << " | "; + + delete test; + + test = new Test(STUDENT , false , nb_sequence * (change_point[i + 2] - change_point[i + 1]) - 2 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + diff = seq_index_parameter[change_point[i + 1]] - index_parameter_mean[0][i + 1]; + buff = test->value * sqrt(variance[0][i + 1] * (1. / (double)(nb_sequence * (change_point[i + 2] - change_point[i + 1])) + + diff * diff / index_parameter_variance[0][i + 1])); + os << MAX(intercept[0][i + 1] + slope[0][i + 1] * seq_index_parameter[change_point[i + 1]] - buff , 0) << ", " + << intercept[0][i + 1] + slope[0][i + 1] * seq_index_parameter[change_point[i + 1]] + buff << ")"; + } + os << endl; + } + + delete test; + + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << ": "; + for (i = 0;i < nb_segment;i++) { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i]] << " -> "; + if (i < nb_segment - 1) { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]] << " | "; + } + else { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1] - 1] << endl; + } + } + } + } + + else { + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << ", " + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_INTERCEPT] << ", " + << STAT_label[STATL_SLOPE] << ", " << STAT_label[STATL_RESIDUAL] << " " + << STAT_label[STATL_STANDARD_DEVIATION] << ": "; + + if (index != I_DEFAULT) { + for (i = 0;i < nb_segment;i++) { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i]] << " -> "; + if (i < nb_segment - 1) { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]] << " | "; + } + else { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1] - 1]; + } + } + os << " || "; + for (i = 0;i < nb_segment;i++) { + os << intercept[index][i] << " " << slope[index][i] << " " << sqrt(variance[index][i]); + if (i < nb_segment - 1) { + os << " | "; + } + } + } + + else if (common_contrast) { + for (i = 0;i < nb_segment;i++) { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i]] << " -> "; + if (i < nb_segment - 1) { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]] << " | "; + } + else { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1] - 1]; + } + } + os << " || "; + for (i = 0;i < nb_segment;i++) { + os << intercept[0][i] << " " << slope[0][i] << " " << sqrt(variance[0][i]); + if (i < nb_segment - 1) { + os << " | "; + } + } + } + os << endl; + } + } + } + + else if ((model_type == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + normal dist; + double standard_normal_value = quantile(complement(dist , 0.025)) , standard_error; + + if (nb_variable > 2) { + os << STAT_label[STATL_VARIABLE] << " " << variable << " "; + } + os << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_MEAN] << ", " + << SEQ_label[SEQL_AUTOREGRESSIVE_COEFF] << " (" << SEQ_label[SEQL_CONFIDENCE_INTERVAL] << " | " + << STAT_label[STATL_NULL_AUTOREGRESSIVE_COEFF_95_CONFIDENCE_LIMIT] << "), " + << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION]; + if (determination_coeff) { + os << ", " << STAT_label[STATL_STANDARD_DEVIATION]; + os << ", " << STAT_label[STATL_DETERMINATION_COEFF]; + } + os << endl; + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { +/* if (index == I_DEFAULT) { + os << endl; + } + else { + os << ": "; + } */ + + for (j = 0;j < nb_segment;j++) { + standard_error = standard_normal_value * sqrt((1. - autoregressive_coeff[i][j] * autoregressive_coeff[i][j]) / + (change_point[j + 1] - change_point[j])); + os << mean[i][j] << " " << autoregressive_coeff[i][j] << " (" + << MAX(autoregressive_coeff[i][j] - standard_error , -1.) << ", " << MIN(autoregressive_coeff[i][j] + standard_error , 1.) + << " | -/+" << standard_normal_value / sqrt((double)(change_point[j + 1] - change_point[j])) << ") " + << sqrt(variance[i][j]); + if (determination_coeff) { + os << " " << sqrt(variance[i][j] / (1. - determination_coeff[i][j])); + os << " " << determination_coeff[i][j]; + } + os << endl; +/* if (j < nb_segment - 1) { + os << " || "; + } */ + } + } + } + } + + else { +// os << ": "; + for (i = 0;i < nb_segment;i++) { + standard_error = standard_normal_value * sqrt((1. - autoregressive_coeff[0][i] * autoregressive_coeff[0][i]) / + (nb_sequence * (change_point[i + 1] - change_point[i]))); + os << mean[0][i] << " " << autoregressive_coeff[0][i] << " (" + << MAX(autoregressive_coeff[0][i] - standard_error , -1.) << ", " << MIN(autoregressive_coeff[0][i] + standard_error , 1.) + << " | -/+" << standard_normal_value / sqrt((double)nb_sequence * (change_point[i + 1] - change_point[i])) << ") " + << sqrt(variance[0][i]); + if (determination_coeff) { + os << " " << sqrt(variance[0][i] / (1. - determination_coeff[0][i])); + os << " " << determination_coeff[0][i]; + } + os << endl; +/* if (i < nb_segment - 1) { + os << " || "; + } */ + } + } +// os << endl; + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of piecewise linear functions at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] variable variable index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model type, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] change_point change points, + * \param[in] seq_index_parameter index parameters, + * \param[in] mean segment means, + * \param[in] variance segment variances or residual variances, + * \param[in] intercept segment intercepts, + * \param[in] slope segment slopes, + * \param[in] autoregressive_coeff segment autoregressive coefficient, + * \param[in] correlation segment correlation coefficients (for linear models), + * \param[in] slope_standard_deviation segment slope standard deviations (for linear models). + * \param[in] index_parameter_mean segment index parameter mean (for linear models), + * \param[in] index_parameter_variance segment index parameter variance (for linear models), + * \param[in] determination_coeff coefficient of determination (for autoregressive models). + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::piecewise_linear_function_spreadsheet_print(ostream &os , int index , int variable , int nb_segment , + segment_model model_type , bool common_contrast , int *change_point , + int *seq_index_parameter , double **mean , double **variance , + double **intercept , double **slope , double **autoregressive_coeff , + double **correlation , double **slope_standard_deviation , + double **index_parameter_mean , long double **index_parameter_variance , + double **determination_coeff) const + +{ + int i , j; + double diff , buff; + Test *test; + + + if ((model_type == POISSON_CHANGE) || (model_type == BAYESIAN_POISSON_CHANGE)) { + if (nb_variable > 2) { + os << STAT_label[STATL_VARIABLE] << "\t" << variable << "\t"; + } + os << SEQ_label[SEQL_SEGMENT] << "\t" << STAT_label[STATL_MEAN] << "\t" + << STAT_label[STATL_VARIANCE]; + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < nb_segment;j++) { + os << "\t" << mean[i][j] << "\t" << variance[i][j] << "\t"; + } + if ((index == I_DEFAULT) && (i < nb_sequence - 1)) { + os << "\t"; + } + } + } + } + + else { + for (i = 0;i < nb_segment;i++) { + os << "\t" << mean[0][i] << "\t" << variance[0][i]; + if (variance[1][i] > 0.) { + os << "\t" << 100 * variance[2][i] / variance[1][i] << "%\t" << 100 * variance[3][i] / variance[1][i] << "%"; + } + if (i < nb_segment - 1) { + os << "\t\t"; + } + } + } + os << endl; + } + + else if ((model_type == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type == GAUSSIAN_CHANGE) || (model_type == MEAN_CHANGE) || + (model_type == VARIANCE_CHANGE) || (model_type == BAYESIAN_GAUSSIAN_CHANGE)) { + if (nb_variable > 2) { + os << STAT_label[STATL_VARIABLE] << "\t" << variable << "\t"; + } + + if ((index != I_DEFAULT) || (!common_contrast)) { + os << SEQ_label[SEQL_SEGMENT] << "\t" << STAT_label[STATL_MEAN] << "\t" + << STAT_label[STATL_STANDARD_DEVIATION]; + + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < nb_segment;j++) { + os << "\t" << mean[i][j] << "\t" << sqrt(variance[i][j]) << "\t"; + } + if ((index == I_DEFAULT) && (i < nb_sequence - 1)) { + os << "\t"; + } + } + } + } + + else { + os << SEQ_label[SEQL_SEGMENT] << "\t" << STAT_label[STATL_MEAN] << "\t" + << STAT_label[STATL_STANDARD_DEVIATION]; + if (model_type != VARIANCE_CHANGE) { + os << "\t" << STAT_label[STATL_VARIANCE]; + } + + for (i = 0;i < nb_segment;i++) { + os << "\t" << mean[0][i] << "\t" << sqrt(variance[0][i]); + if ((model_type != VARIANCE_CHANGE) && (variance[1][i] > 0.)) { + os << "\t" << variance[0][i] << "\t" << 100 * variance[2][i] / variance[1][i] << "%\t" + << 100 * variance[3][i] / variance[1][i] << "%"; + } + if (i < nb_segment - 1) { + os << "\t\t"; + } + } + } + os << endl; + } + + else if ((model_type == LINEAR_MODEL_CHANGE) || (model_type == INTERCEPT_SLOPE_CHANGE)) { + if (nb_variable > 2) { + os << STAT_label[STATL_VARIABLE] << "\t" << variable << "\t"; + } + + if ((index == I_DEFAULT) && (!common_contrast)) { + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_INTERCEPT] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_SLOPE] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_RESIDUAL] << " " + << STAT_label[STATL_STANDARD_DEVIATION] << endl; + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_segment;j++) { + os << intercept[i][j] + slope[i][j] * seq_index_parameter[change_point[j]] << "->"; + if (j < nb_segment - 1) { + os << intercept[i][j] + slope[i][j] * seq_index_parameter[change_point[j + 1]] << "\t"; + } + else { + os << intercept[i][j] + slope[i][j] * seq_index_parameter[change_point[j + 1] - 1]; + } + } + for (j = 0;j < nb_segment;j++) { + os << "\t\t" << intercept[i][j] << "\t" << slope[i][j] << "\t" << sqrt(variance[i][j]); + } + os << endl; + } + } + + else { + if ((correlation) && (slope_standard_deviation)) { + os << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_INTERCEPT] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_SLOPE] << "\t" + << SEQ_label[SEQL_CONFIDENCE_INTERVAL] << "\t\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_CORRELATION_COEFF] << "\t" + << STAT_label[STATL_LIMIT_CORRELATION_COEFF] << "\t" << SEQ_label[SEQL_SEGMENT] << " " + << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << "\t" + << SEQ_label[SEQL_CHANGE_POINT_AMPLITUDE] << "\t" << SEQ_label[SEQL_CONFIDENCE_INTERVALS] << endl; + + if (index != I_DEFAULT) { + test = new Test(STUDENT , false , change_point[1] - change_point[0] - 2 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + for (i = 0;i < nb_segment;i++) { + os << intercept[index][i] << "\t" << slope[index][i]; + if (slope_standard_deviation[index][i] > 0.) { + os << "\t" << slope[index][i] - test->value * slope_standard_deviation[index][i] + << "\t" << slope[index][i] + test->value * slope_standard_deviation[index][i]; + } + os << "\t" << correlation[index][i] << "\t-/+" + << test->value / sqrt(test->value * test->value + change_point[i + 1] - change_point[i] - 2) + << "\t" << sqrt(variance[index][i]); + + if (i < nb_segment - 1) { + os << "\t" << intercept[index][i + 1] + slope[index][i + 1] * seq_index_parameter[change_point[i + 1]] - + (intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]]); + + diff = seq_index_parameter[change_point[i + 1]] - index_parameter_mean[index][i]; + buff = test->value * sqrt(variance[index][i] * (1. / (double)(change_point[i + 1] - change_point[i]) + + diff * diff / index_parameter_variance[index][i])); + +// os << "\t" << test->value; + os << "\t" << MAX(intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]] - buff , 0) + << "\t" << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]] + buff; + + delete test; + + test = new Test(STUDENT , false , change_point[i + 2] - change_point[i + 1] - 2 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + diff = seq_index_parameter[change_point[i + 1]] - index_parameter_mean[index][i + 1]; + buff = test->value * sqrt(variance[index][i + 1] * (1. / (double)(change_point[i + 2] - change_point[i + 1]) + + diff * diff / index_parameter_variance[index][i + 1])); + os << "\t" << MAX(intercept[index][i + 1] + slope[index][i + 1] * seq_index_parameter[change_point[i + 1]] - buff , 0) + << "\t" << intercept[index][i + 1] + slope[index][i + 1] * seq_index_parameter[change_point[i + 1]] + buff; + } + os << endl; + } + + delete test; + + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << "\t"; + for (i = 0;i < nb_segment;i++) { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i]] << " -> "; + if (i < nb_segment - 1) { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]] << "\t"; + } + else { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1] - 1] << endl; + } + } + } + + else if (common_contrast) { + test = new Test(STUDENT , false , nb_sequence * (change_point[1] - change_point[0]) - 2 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + for (i = 0;i < nb_segment;i++) { + os << intercept[0][i] << "\t" << slope[0][i]; + if (slope_standard_deviation[0][i] > 0.) { + os << "\t" << slope[0][i] - test->value * slope_standard_deviation[0][i] + << "\t" << slope[0][i] + test->value * slope_standard_deviation[0][i]; + } + os << "\t" << correlation[0][i] << "\t-/+" + << test->value / sqrt(test->value * test->value + nb_sequence * (change_point[i + 1] - change_point[i]) - 2) + << "\t" << sqrt(variance[0][i]); + + if (i < nb_segment - 1) { + os << "\t" << intercept[0][i + 1] + slope[0][i + 1] * seq_index_parameter[change_point[i + 1]] - + (intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]]); + + diff = seq_index_parameter[change_point[i + 1]] - index_parameter_mean[0][i]; + buff = test->value * sqrt(variance[0][i] * (1. / (double)(nb_sequence * (change_point[i + 1] - change_point[i])) + + diff * diff / index_parameter_variance[0][i])); + +// os << "\t" << test->value; + os << "\t" << MAX(intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]] - buff , 0) + << "\t" << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]] + buff; + + delete test; + + test = new Test(STUDENT , false , nb_sequence * (change_point[i + 2] - change_point[i + 1]) - 2 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + diff = seq_index_parameter[change_point[i + 1]] - index_parameter_mean[0][i + 1]; + buff = test->value * sqrt(variance[0][i + 1] * (1. / (double)(nb_sequence * (change_point[i + 2] - change_point[i + 1])) + + diff * diff / index_parameter_variance[0][i + 1])); + os << "\t" << MAX(intercept[0][i + 1] + slope[0][i + 1] * seq_index_parameter[change_point[i + 1]] - buff , 0) + << "\t" << intercept[0][i + 1] + slope[0][i + 1] * seq_index_parameter[change_point[i + 1]] + buff; + } + os << endl; + } + + delete test; + + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << "\t"; + for (i = 0;i < nb_segment;i++) { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i]] << "->"; + if (i < nb_segment - 1) { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]] << "\t"; + } + else { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1] - 1] << endl; + } + } + } + } + + else { + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_INTERCEPT] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_SLOPE] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_RESIDUAL] << " " + << STAT_label[STATL_STANDARD_DEVIATION] << "\t"; + + if (index != I_DEFAULT) { + for (i = 0;i < nb_segment;i++) { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i]] << " -> "; + if (i < nb_segment - 1) { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1]] << "\t"; + } + else { + os << intercept[index][i] + slope[index][i] * seq_index_parameter[change_point[i + 1] - 1]; + } + } + for (i = 0;i < nb_segment;i++) { + os << "\t\t" << intercept[index][i] << "\t" << slope[index][i] << "\t" << sqrt(variance[index][i]); + } + } + + else if (common_contrast) { + for (i = 0;i < nb_segment;i++) { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i]] << " -> "; + if (i < nb_segment - 1) { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1]] << "\t"; + } + else { + os << intercept[0][i] + slope[0][i] * seq_index_parameter[change_point[i + 1] - 1]; + } + } + for (i = 0;i < nb_segment;i++) { + os << "\t\t" << intercept[0][i] << "\t" << slope[0][i] << "\t" << sqrt(variance[0][i]); + } + } + os << endl; + } + } + } + + else if ((model_type == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + normal dist; + double standard_normal_value = quantile(complement(dist , 0.025)) , standard_error; + + if (nb_variable > 2) { + os << STAT_label[STATL_VARIABLE] << "\t" << variable << "\t"; + } + os << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_MEAN] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << SEQ_label[SEQL_AUTOREGRESSIVE_COEFF] << "\t" + << SEQ_label[SEQL_CONFIDENCE_INTERVAL] << "\t" + << STAT_label[STATL_NULL_AUTOREGRESSIVE_COEFF_95_CONFIDENCE_LIMIT] << "\t" + << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_RESIDUAL] << " " + << STAT_label[STATL_STANDARD_DEVIATION]; + if (determination_coeff) { + os << "\t" << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_STANDARD_DEVIATION]; + os << "\t" << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_DETERMINATION_COEFF]; + } + os << endl; + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { +/* if (index == I_DEFAULT) { + os << endl; + } + else { + os << "\t"; + } */ + + for (j = 0;j < nb_segment;j++) { + standard_error = standard_normal_value * sqrt((1. - autoregressive_coeff[i][j] * autoregressive_coeff[i][j]) / + (change_point[j + 1] - change_point[j])); + os << mean[i][j] << "\t" << autoregressive_coeff[i][j] << "\t" + << MAX(autoregressive_coeff[i][j] - standard_error , -1.) << "\t" << MIN(autoregressive_coeff[i][j] + standard_error , 1.) + << "\t-/+" << standard_normal_value / sqrt((double)(change_point[j + 1] - change_point[j])) << "\t" + << sqrt(variance[i][j]); + if (determination_coeff) { + os << "\t" << sqrt(variance[i][j] / (1. - determination_coeff[i][j])); + os << "\t" << determination_coeff[i][j]; + } + os << endl; +/* if (j < nb_segment - 1) { + os << "\t\t"; + } */ + } + } + } + } + + else { +// os << "\t"; + for (i = 0;i < nb_segment;i++) { + standard_error = standard_normal_value * sqrt((1. - autoregressive_coeff[0][i] * autoregressive_coeff[0][i]) / + (nb_sequence * (change_point[i + 1] - change_point[i]))); + os << mean[0][i] << "\t" << autoregressive_coeff[0][i] << "\t" + << MAX(autoregressive_coeff[0][i] - standard_error , -1.) << "\t" << MIN(autoregressive_coeff[0][i] + standard_error , 1.) + << "\t-/+" << standard_normal_value / sqrt((double)nb_sequence * (change_point[i + 1] - change_point[i])) << "\t" + << sqrt(variance[0][i]); + if (determination_coeff) { + os << "\t" << sqrt(variance[0][i] / (1. - determination_coeff[0][i])); + os << "\t" << determination_coeff[0][i]; + } + os << endl; +/* if (i < nb_segment - 1) { + os << "\t\t"; + } */ + } + } +// os << endl; + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of continuous piecewise linear functions. + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] variable variable index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model type, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] change_point change points, + * \param[in] seq_index_parameter index parameters, + * \param[in] intercept segment intercepts, + * \param[in] slope segment slopes, + * \param[in] corrected_intercept corrected segment intercepts + * \param[in] corrected_slope corrected segment slopes. + */ +/*--------------------------------------------------------------*/ + +double Sequences::continuous_piecewise_linear_function(ostream &os , int index , int variable , int nb_segment , + segment_model model_type , bool common_contrast , + int *change_point , int *seq_index_parameter , + double *intercept , double *slope , + double *corrected_intercept , double *corrected_slope) const + +{ + int i , j , k; + double likelihood , diff , residual_mean , global_variance , *predicted_value , *variance; + long double square_sum , global_square_sum , residual_square_sum , global_residual_square_sum; + + + predicted_value = new double[nb_segment + 1]; + + predicted_value[0] = intercept[0] + slope[0] * seq_index_parameter[0]; + for (i = 1;i < nb_segment;i++) { + predicted_value[i] = (fabs(slope[i - 1]) * (intercept[i - 1] + slope[i - 1] * seq_index_parameter[change_point[i]]) + + fabs(slope[i]) * (intercept[i] + slope[i] * seq_index_parameter[change_point[i]])) / + (fabs(slope[i - 1]) + fabs(slope[i])); + } + predicted_value[nb_segment] = intercept[nb_segment - 1] + slope[nb_segment - 1] * + seq_index_parameter[length[index == I_DEFAULT ? 0 : index] - 1]; + + for (i = 0;i < nb_segment;i++) { + corrected_slope[i] = (predicted_value[i + 1] - predicted_value[i]) / + (seq_index_parameter[change_point[i + 1]] - seq_index_parameter[change_point[i]]); + corrected_intercept[i] = predicted_value[i] - corrected_slope[i] * seq_index_parameter[change_point[i]]; + } + + variance = new double[nb_segment]; + + likelihood = 0.; + + if (model_type == INTERCEPT_SLOPE_CHANGE) { + global_square_sum = 0.; + global_residual_square_sum = 0.; + } + + if (index != I_DEFAULT) { + for (i = 0;i < nb_segment;i++) { + residual_mean = 0.; + residual_square_sum = 0.; + square_sum = 0.; + + if (change_point[i + 1] - change_point[i] > 2) { + if (type[variable] != REAL_VALUE) { + for (j = change_point[i];j < change_point[i + 1];j++) { + diff = int_sequence[index][variable][j] - (corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[j]); + residual_mean += diff; + square_sum += diff * diff; + } + residual_mean /= (change_point[i + 1] - change_point[i]); + + for (j = change_point[i];j < change_point[i + 1];j++) { + diff = int_sequence[index][variable][j] - (corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[j]) - residual_mean; + residual_square_sum += diff * diff; + } + } + + else { + for (j = change_point[i];j < change_point[i + 1];j++) { + diff = real_sequence[index][variable][j] - (corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[j]); + residual_mean += diff; + square_sum += diff * diff; + } + residual_mean /= (change_point[i + 1] - change_point[i]); + + for (j = change_point[i];j < change_point[i + 1];j++) { + diff = real_sequence[index][variable][j] - (corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[j]) - residual_mean; + residual_square_sum += diff * diff; + } + } + + if (model_type == INTERCEPT_SLOPE_CHANGE) { + global_square_sum += square_sum; + global_residual_square_sum += residual_square_sum; + } + +# ifdef DEBUG + cout << "\nTEST " << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ": " + << sqrt(square_sum / (change_point[i + 1] - change_point[i] - 2)) << " " + << sqrt(residual_square_sum / (change_point[i + 1] - change_point[i] - 2)) << " | " << residual_mean << endl; +# endif + + variance[i] = residual_square_sum / (change_point[i + 1] - change_point[i] - 2); + + if ((model_type == LINEAR_MODEL_CHANGE) && (likelihood != D_INF)) { + if (square_sum > (change_point[i + 1] - change_point[i]) * ROUNDOFF_ERROR) { + likelihood -= ((double)(change_point[i + 1] - change_point[i]) / 2.) * (log(square_sum / + (change_point[i + 1] - change_point[i])) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + } + + else { + variance[i] = 0.; + } + } + + if (model_type == INTERCEPT_SLOPE_CHANGE) { + if (global_square_sum > length[index] * ROUNDOFF_ERROR) { + likelihood -= ((double)length[index] / 2.) * (log(global_square_sum / length[index]) + + log(2 * M_PI) + 1); + +# ifdef DEBUG + cout << "\nTEST " << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ": " + << sqrt(global_square_sum / (length[index] - 2 * nb_segment)) << " " + << sqrt(global_residual_square_sum / (length[index] - 2 * nb_segment)) << endl; +# endif + + } + else { + likelihood = D_INF; + } + } + } + + else if (common_contrast) { + for (i = 0;i < nb_segment;i++) { + residual_mean = 0.; + residual_square_sum = 0.; + square_sum = 0.; + + if (change_point[i + 1] - change_point[i] > 2) { + if (type[variable] != REAL_VALUE) { + for (j = change_point[i];j < change_point[i + 1];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][variable][j] - (corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[j]); + residual_mean += diff; + square_sum += diff * diff; + } + } + residual_mean /= (change_point[i + 1] - change_point[i]); + + for (j = change_point[i];j < change_point[i + 1];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][variable][j] - (corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[j]) - residual_mean; + residual_square_sum += diff * diff; + } + } + } + + else { + for (j = change_point[i];j < change_point[i + 1];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][variable][j] - (corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[j]); + residual_mean += diff; + square_sum += diff * diff; + } + } + residual_mean /= (change_point[i + 1] - change_point[i]); + + for (j = change_point[i];j < change_point[i + 1];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][variable][j] - (corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[j]) - residual_mean; + residual_square_sum += diff * diff; + } + } + } + + if (model_type == INTERCEPT_SLOPE_CHANGE) { + global_square_sum += square_sum; + global_residual_square_sum += residual_square_sum; + } + + variance[i] = residual_square_sum / (nb_sequence * (change_point[i + 1] - change_point[i]) - 2); + +# ifdef DEBUG + cout << "\nTEST " << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ": " + << sqrt(square_sum / (nb_sequence * (change_point[i + 1] - change_point[i]) - 2)) << " " + << sqrt(residual_square_sum / (nb_sequence * (change_point[i + 1] - change_point[i]) - 2)) << " | " << residual_mean << endl; +# endif + + if ((model_type == LINEAR_MODEL_CHANGE) && (likelihood != D_INF)) { + if (square_sum > nb_sequence * (change_point[i + 1] - change_point[i]) * ROUNDOFF_ERROR) { + likelihood -= ((double)(nb_sequence * (change_point[i + 1] - change_point[i])) / 2.) * (log(square_sum / + (nb_sequence * (change_point[i + 1] - change_point[i]))) + log(2 * M_PI) + 1); + } + else { + likelihood = D_INF; + } + } + } + + else { + variance[i] = 0.; + } + } + + if (model_type == INTERCEPT_SLOPE_CHANGE) { + if (global_square_sum > nb_sequence * length[0] * ROUNDOFF_ERROR) { + likelihood -= ((double)(nb_sequence * length[0]) / 2.) * (log(global_square_sum / + (nb_sequence * length[0])) + log(2 * M_PI) + 1); + +# ifdef DEBUG + cout << "\nTEST " << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ": " + << sqrt(global_square_sum / (nb_sequence * length[0] - 2 * nb_segment)) << " " + << sqrt(global_residual_square_sum / (nb_sequence * length[0] - 2 * nb_segment)) << endl; +# endif + + } + else { + likelihood = D_INF; + } + } + } + + os << "\n" << SEQ_label[SEQL_SEGMENT] << " " << STAT_label[STATL_INTERCEPT] << ", " + << STAT_label[STATL_SLOPE]; + if (model_type == LINEAR_MODEL_CHANGE) { + os << ", " << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION]; + } + os << ": "; + for (i = 0;i < nb_segment;i++) { + os << corrected_intercept[i] << " " << corrected_slope[i]; + if (model_type == LINEAR_MODEL_CHANGE) { + os << " " << sqrt(variance[i]); + } + if (i < nb_segment - 1) { + os << " | "; + } + } + + if (model_type == INTERCEPT_SLOPE_CHANGE) { + os << " " << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ": " << sqrt(global_variance); + } + os << endl; + + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << ": "; + for (i = 0;i < nb_segment;i++) { + os << corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[change_point[i]] << " -> "; + if (i < nb_segment - 1) { + os << corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[change_point[i + 1]] << " | "; + } + else { + os << corrected_intercept[i] + corrected_slope[i] * seq_index_parameter[change_point[i + 1] - 1] << endl; + } + } + + delete [] predicted_value; + delete [] variance; + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Output of a segmentation of a single sequence or a sample of sequences. + * + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] os stream for displaying the segmentation, + * \param[in] output output type (sequence or residuals), + * \param[in] ichange_point change points, + * \param[in] continuity flag continuous piecewise linear function. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation_output(int nb_segment , segment_model *model_type , + bool common_contrast , ostream *os , sequence_type output , + int *ichange_point , bool continuity) + +{ + bool *piecewise_function_flag; + int i , j , k , m , n; + int inb_variable , min_identifier , max_identifier , *iidentifier , *ilength , *change_point , + *seq_index_parameter = NULL; + variable_nature *itype; + double likelihood , corrected_likelihood , diff , buff , change_point_amplitude , mean_absolute_deviation , + *global_variance , ***mean , ***variance , ***index_parameter_mean , ***intercept , ***slope , + ***correlation , ***slope_standard_deviation , ***corrected_intercept , ***corrected_slope , + ***autoregressive_coeff , ***determination_coeff; + long double ***index_parameter_variance; + Test *test; + Sequences *seq; + + + if (ichange_point) { + change_point = ichange_point; + } + + else { + change_point = new int[nb_segment + 1]; + + change_point[0] = 0; + i = 1; + for (j = 1;j < length[0];j++) { + if (int_sequence[0][0][j] != int_sequence[0][0][j - 1]) { + change_point[i++] = j; + } + } + change_point[i] = length[0]; + } + + mean = new double**[nb_variable]; + variance = new double**[nb_variable]; + intercept = new double**[nb_variable]; + slope = new double**[nb_variable]; + correlation = new double**[nb_variable]; + slope_standard_deviation = new double**[nb_variable]; + index_parameter_mean = new double**[nb_variable]; + index_parameter_variance = new long double**[nb_variable]; + autoregressive_coeff = new double**[nb_variable]; + determination_coeff = new double**[nb_variable]; + + global_variance = NULL; + + if (continuity) { + corrected_intercept = new double**[nb_variable]; + corrected_slope = new double**[nb_variable]; + } + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[length[0]]; + for (j = 0;j < length[0];j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[0]; + } + + for (i = 1;i < nb_variable;i++) { + mean[i] = NULL; + intercept[i] = NULL; + slope[i] = NULL; + correlation[i] = NULL; + slope_standard_deviation[i] = NULL; + + if (continuity) { + corrected_intercept[i] = NULL; + corrected_slope[i] = NULL; + } + + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if (common_contrast) { + mean[i] = new double*[1]; + mean[i][0] = new double[nb_segment]; + variance[i] = new double*[4]; + for (j = 0;j < 4;j++) { + variance[i][j] = new double[nb_segment]; + } + } + + else { + mean[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + mean[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + } + } + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if (common_contrast) { + intercept[i] = new double*[1]; + intercept[i][0] = new double[nb_segment]; + slope[i] = new double*[1]; + slope[i][0] = new double[nb_segment]; + variance[i] = new double*[1]; + variance[i][0] = new double[nb_segment]; + correlation[i] = new double*[1]; + correlation[i][0] = new double[nb_segment]; + slope_standard_deviation[i] = new double*[1]; + slope_standard_deviation[i][0] = new double[nb_segment]; + index_parameter_mean[i] = new double*[1]; + index_parameter_mean[i][0] = new double[nb_segment]; + index_parameter_variance[i] = new long double*[1]; + index_parameter_variance[i][0] = new long double[nb_segment]; + + if (continuity) { + corrected_intercept[i] = new double*[1]; + corrected_intercept[i][0] = new double[nb_segment]; + corrected_slope[i] = new double*[1]; + corrected_slope[i][0] = new double[nb_segment]; + } + } + + else { + intercept[i] = new double*[nb_sequence]; + slope[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + correlation[i] = new double*[nb_sequence]; + slope_standard_deviation[i] = new double*[nb_sequence]; + index_parameter_mean[i] = new double*[nb_sequence]; + index_parameter_variance[i] = new long double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + intercept[i][j] = new double[nb_segment]; + slope[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + correlation[i][j] = new double[nb_segment]; + slope_standard_deviation[i][j] = new double[nb_segment]; + index_parameter_mean[i][j] = new double[nb_segment]; + index_parameter_variance[i][j] = new long double[nb_segment]; + } + } + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if (common_contrast) { + mean[i] = new double*[1]; + mean[i][0] = new double[nb_segment]; + autoregressive_coeff[i] = new double*[1]; + autoregressive_coeff[i][0] = new double[nb_segment]; + variance[i] = new double*[1]; + variance[i][0] = new double[nb_segment]; + determination_coeff[i] = new double*[1]; + determination_coeff[i][0] = new double[nb_segment]; + } + + else { + mean[i] = new double*[nb_sequence]; + autoregressive_coeff[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + determination_coeff[i] = new double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + mean[i][j] = new double[nb_segment]; + autoregressive_coeff[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + determination_coeff[i][j] = new double[nb_segment]; + } + } + } + + if ((((i == 1) && ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE))) || + (model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == LINEAR_MODEL_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) && (!global_variance)) { + global_variance = new double[nb_variable]; + } + } + + if (output == SEQUENCE) { + piecewise_function_flag = new bool[nb_variable]; + + piecewise_function_flag[0] = false; + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == LINEAR_MODEL_CHANGE) || + (model_type[0] == INTERCEPT_SLOPE_CHANGE) || (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + piecewise_function_flag[i] = true; + } + else { + piecewise_function_flag[i] = false; + } + } + + seq = new Sequences(*this , piecewise_function_flag); + } + + else if (output == SEQUENCE_SAMPLE) { + iidentifier = new int[nb_sequence + 2]; + + min_identifier = identifier[0]; + for (i = 0;i < nb_sequence;i++) { + if (identifier[i] < min_identifier) { + min_identifier = identifier[i]; + } + + iidentifier[i + 1] = identifier[i]; + } + + iidentifier[0] = min_identifier - 1; + + max_identifier = identifier[nb_sequence - 1]; + for (i = 0;i < nb_sequence - 1;i++) { + if (identifier[i] > max_identifier) { + max_identifier = identifier[i]; + } + } + + iidentifier[nb_sequence + 1] = max_identifier + 1; + + ilength = new int[nb_sequence + 2]; + for (i = 0;i < nb_sequence + 2;i++) { + ilength[i] = length[0]; + } + + itype = new variable_nature[nb_variable - 1]; + for (i = 0;i < nb_variable - 1;i++) { + itype[i] = REAL_VALUE; + } + + seq = new Sequences(nb_sequence + 2 , iidentifier , ilength , NULL , + index_param_type , nb_variable - 1 , itype); + delete [] iidentifier; + delete [] ilength; + delete [] itype; + } + + else if (output == ABSOLUTE_RESIDUAL) { + inb_variable = 1 + 2 * (nb_variable - 1); + itype = new variable_nature[inb_variable]; + + itype[0] = type[0]; + i = 1; + for (j = 1;j < nb_variable;j++) { + itype[i++] = REAL_VALUE; + itype[i++] = AUXILIARY; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , inb_variable , itype); + delete [] itype; + } + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + likelihood = piecewise_linear_function((nb_sequence == 1 ? 0 : I_DEFAULT) , i , nb_segment , + model_type[i - 1] , common_contrast , change_point , + seq_index_parameter , NULL , mean[i] , variance[i] , + global_variance , intercept[i] , slope[i] , + autoregressive_coeff[i] , correlation[i] , + slope_standard_deviation[i] , index_parameter_mean[i] , + index_parameter_variance[i] , determination_coeff[i]); + + if ((os) && (((i == 1) && ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE))) || + (model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE))) { + *os << "\n2 * " << STAT_label[STATL_LIKELIHOOD] << ": " << 2 * likelihood << endl; + } + } + } + + if (os) { + if (!ichange_point) { + *os << (nb_segment == 2 ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_CHANGE_POINTS]) << ": "; + + for (i = 1;i < nb_segment;i++) { + *os << seq_index_parameter[change_point[i]]; + if (i < nb_segment - 1) { + *os << ", "; + } + } + } + + if ((index_interval) && (index_interval->variance > 0.)) { + if (!ichange_point) { + *os << " "; + } + *os << SEQ_label[SEQL_SEGMENT_SAMPLE_SIZE] << ": "; + for (i = 0;i < nb_segment;i++) { + *os << nb_sequence * (change_point[i + 1] - change_point[i]); + if (i < nb_segment - 1) { + *os << ", "; + } + } + *os << endl; + } + + else if (!ichange_point) { + *os << endl; + } + + if (nb_variable > 2) { + *os << "\n"; + } + + for (i = 1;i < nb_variable;i++) { + piecewise_linear_function_ascii_print(*os , (nb_sequence == 1 ? 0 : I_DEFAULT) , i , nb_segment , model_type[i - 1] , + common_contrast , change_point , seq_index_parameter , + mean[i] , variance[i] , intercept[i] , slope[i] , + autoregressive_coeff[i] , correlation[i] , slope_standard_deviation[i] , + index_parameter_mean[i] , index_parameter_variance[i] , determination_coeff[i]); + + if ((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[0] == MEAN_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if (nb_segment > 1) { + change_point_amplitude = 0.; + + if ((nb_sequence == 1) || (common_contrast)) { + for (j = 1;j < nb_segment;j++) { + change_point_amplitude += fabs(mean[i][0][j] - mean[i][0][j - 1]); + } + change_point_amplitude /= (nb_segment - 1); + } + + else { + for (j = 0;j < nb_sequence;j++) { + for (k = 1;k < nb_segment;k++) { + change_point_amplitude += fabs(mean[i][j][k] - mean[i][j][k - 1]); + } + } + change_point_amplitude /= (nb_sequence * (nb_segment - 1)); + } + + *os << STAT_label[STATL_MEAN] << " " << SEQ_label[SEQL_CHANGE_POINT_AMPLITUDE] << ": " + << change_point_amplitude << " "; + } + + *os << SEQ_label[SEQL_ROOT_MEAN_SQUARE_ERROR] << ": " << sqrt(global_variance[i]); + if (nb_segment > 1) { + *os << " " << STAT_label[STATL_RATIO] << ": " + << change_point_amplitude / sqrt(global_variance[i]); + } + *os << endl; + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + *os << SEQ_label[SEQL_ROOT_MEAN_SQUARE_ERROR] << ": " << sqrt(global_variance[i]) << endl; + } + + else if (model_type[0] == MEAN_CHANGE) { + *os << SEQ_label[SEQL_GLOBAL_STANDARD_DEVIATION] << ": " << sqrt(global_variance[i]) << endl; + } + else if (model_type[0] == INTERCEPT_SLOPE_CHANGE) { + *os << SEQ_label[SEQL_GLOBAL_RESIDUAL_STANDARD_DEVIATION] << ": " << sqrt(global_variance[i]) << endl; + } + + if (continuity) { + corrected_likelihood = continuous_piecewise_linear_function(*os , (nb_sequence == 1 ? 0 : I_DEFAULT) , i , + nb_segment , model_type[i - 1] , common_contrast , + change_point , seq_index_parameter , intercept[i][0] , + slope[i][0] , corrected_intercept[i][0] , corrected_slope[i][0]); + + *os << "2 * " << STAT_label[STATL_LIKELIHOOD] << ": " + << 2 * corrected_likelihood << " | " << 2 * likelihood << endl; + } + } + } + + switch (output) { + + case SEQUENCE : { + if (common_contrast) { + for (i = 0;i < nb_sequence;i++) { + j = 1; + for (k = 1;k < nb_variable;k++) { + j++; + if (piecewise_function_flag[k]) { + if ((model_type[k - 1] == LINEAR_MODEL_CHANGE) || (model_type[k - 1] == INTERCEPT_SLOPE_CHANGE)) { + if (continuity) { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = corrected_intercept[k][0][m] + corrected_slope[k][0][m] * seq_index_parameter[n]; + } + } + } + + else{ + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = intercept[k][0][m] + slope[k][0][m] * seq_index_parameter[n]; + } + } + } + } + + else if ((model_type[k - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[k - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if (type[k] != REAL_VALUE) { + for (m = 0;m < nb_segment;m++) { + seq->real_sequence[i][j][change_point[m]] = mean[k][0][m]; + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = mean[k][0][m] + autoregressive_coeff[k][0][m] * (int_sequence[i][k][n - 1] - mean[k][0][m]); + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + seq->real_sequence[i][j][change_point[m]] = mean[k][0][m]; + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = mean[k][0][m] + autoregressive_coeff[k][0][m] * (real_sequence[i][k][n - 1] - mean[k][0][m]); + } + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = mean[k][0][m]; + } + } + } + + j++; + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + j = 1; + for (k = 1;k < nb_variable;k++) { + j++; + if (piecewise_function_flag[k]) { + if ((model_type[k - 1] == LINEAR_MODEL_CHANGE) || (model_type[k - 1] == INTERCEPT_SLOPE_CHANGE)) { + if (continuity) { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = corrected_intercept[k][i][m] + corrected_slope[k][i][m] * seq_index_parameter[n]; + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = intercept[k][i][m] + slope[k][i][m] * seq_index_parameter[n]; + } + } + } + } + + else if ((model_type[k - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[k - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if (type[k] != REAL_VALUE) { + for (m = 0;m < nb_segment;m++) { + seq->real_sequence[i][j][change_point[m]] = mean[k][i][m]; + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = mean[k][i][m] + autoregressive_coeff[k][i][m] * (int_sequence[i][k][n - 1] - mean[k][i][m]); + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + seq->real_sequence[i][j][change_point[m]] = mean[k][i][m]; + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = mean[k][i][m] + autoregressive_coeff[k][i][m] * (real_sequence[i][k][n - 1] - mean[k][i][m]); + } + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = mean[k][i][m]; + } + } + } + j++; + } + } + } + } + break; + } + + case SEQUENCE_SAMPLE : { + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[0] + 1 : length[0]);j++) { + seq->index_parameter[i][j] = index_parameter[0][j]; + } + } + + seq->build_index_parameter_frequency_distribution(); + seq->index_interval_computation(); + } + + // copy of sequences + + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + seq->real_sequence[i + 1][j - 1][k] = int_sequence[i][j][k]; + } + } + + else { + for (k = 0;k < length[i];k++) { + seq->real_sequence[i + 1][j - 1][k] = real_sequence[i][j][k]; + } + } + } + } + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[i - 1] == INTERCEPT_SLOPE_CHANGE)) { + if (continuity) { + for (j = 0;j < nb_segment;j++) { + for (k = change_point[j];k < change_point[j + 1];k++) { + seq->real_sequence[0][i - 1][k] = corrected_intercept[i][0][j] + corrected_slope[i][0][j] * seq_index_parameter[k]; + } + } + } + + else{ + for (j = 0;j < nb_segment;j++) { + for (k = change_point[j];k < change_point[j + 1];k++) { + seq->real_sequence[0][i - 1][k] = intercept[i][0][j] + slope[i][0][j] * seq_index_parameter[k]; + } + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { + for (k = change_point[j];k < change_point[j + 1];k++) { + seq->real_sequence[0][i - 1][k] = mean[i][0][j]; + } + } + } + + if ((model_type[i - 1] == MEAN_CHANGE) || (model_type[i - 1] == INTERCEPT_SLOPE_CHANGE)) { + buff = sqrt(global_variance[0]); + for (j = 0;j < length[i];j++) { + seq->real_sequence[nb_sequence + 1][i - 1][j] = buff; + } + } + + else { + for (j = 0;j < nb_segment;j++) { + buff = sqrt(variance[i][0][j]); + for (k = change_point[j];k < change_point[j + 1];k++) { + seq->real_sequence[nb_sequence + 1][i - 1][k] = buff; + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_histogram(i); + } + break; + } + + // residual computation + + case SUBTRACTION_RESIDUAL : { + if (common_contrast) { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + real_sequence[i][j] = new double[length[i]]; + + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m] - (intercept[j][0][k] + slope[j][0][k] * seq_index_parameter[m]); + } + } + } + + else if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + real_sequence[i][j][change_point[k]] = int_sequence[i][j][change_point[k]] - mean[j][0][k]; + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m] - (mean[j][0][k] + autoregressive_coeff[j][0][k] * + (int_sequence[i][j][m - 1] - mean[j][0][k])); + } + } + } + + else { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m] - mean[j][0][k]; + } + } + } + + delete [] int_sequence[i][j]; + int_sequence[i][j] = NULL; + } + + else { + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] -= (intercept[j][0][k] + slope[j][0][k] * seq_index_parameter[m]); + } + } + } + + else if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k + 1] - 1;m > change_point[k];m--) { + real_sequence[i][j][m] -= (mean[j][0][k] + autoregressive_coeff[j][0][k] * + (real_sequence[i][j][m - 1] - mean[j][0][k])); + } + real_sequence[i][j][change_point[k]] -= mean[j][0][k]; + } + } + + else { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] -= mean[j][0][k]; + } + } + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + real_sequence[i][j] = new double[length[i]]; + + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m] - (intercept[j][i][k] + slope[j][i][k] * seq_index_parameter[m]); + } + } + } + + else if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + real_sequence[i][j][change_point[k]] = int_sequence[i][j][change_point[k]] - mean[j][i][k]; + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m] - (mean[j][i][k] + autoregressive_coeff[j][i][k] * + (int_sequence[i][j][m - 1] - mean[j][i][k])); + } + } + } + + else { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m] - mean[j][i][k]; + } + } + } + + delete [] int_sequence[i][j]; + int_sequence[i][j] = NULL; + } + + else { + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] -= (intercept[j][i][k] + slope[j][i][k] * seq_index_parameter[m]); + } + } + } + + else if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k + 1] - 1;m > change_point[k];m--) { + real_sequence[i][j][m] -= (mean[j][i][k] + autoregressive_coeff[j][i][k] * + (real_sequence[i][j][m - 1] - mean[j][i][k])); + } + real_sequence[i][j][change_point[k]] -= mean[j][i][k]; + } + } + + else { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] -= mean[j][i][k]; + } + } + } + } + } + } + } + break; + } + + case ABSOLUTE_RESIDUAL : { + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + seq->int_sequence[i][0][j] = int_sequence[i][0][j]; + } + } + + if (common_contrast) { + for (i = 0;i < nb_sequence;i++) { + j = 1; + for (k = 1;k < nb_variable;k++) { + if (type[k] != REAL_VALUE) { + if ((model_type[k - 1] == LINEAR_MODEL_CHANGE) || (model_type[k - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(int_sequence[i][k][n] - (intercept[k][0][m] + slope[k][0][m] * seq_index_parameter[n])); + } + } + } + + else if ((model_type[k - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[k - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (m = 0;m < nb_segment;m++) { + seq->real_sequence[i][j][change_point[m]] = fabs(int_sequence[i][k][change_point[m]] - mean[k][0][m]); + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(int_sequence[i][k][n] - (mean[k][0][m] + autoregressive_coeff[k][0][m] * + (int_sequence[i][k][n - 1] - mean[k][0][m]))); + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(int_sequence[i][k][n] - mean[k][0][m]); + } + } + } + } + + else { + if ((model_type[k - 1] == LINEAR_MODEL_CHANGE) || (model_type[k - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(real_sequence[i][k][n] - (intercept[k][0][m] + slope[k][0][m] * seq_index_parameter[n])); + } + } + } + + else if ((model_type[k - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[k - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (m = 0;m < nb_segment;m++) { + seq->real_sequence[i][j][change_point[m]] = fabs(real_sequence[i][k][change_point[m]] - mean[k][0][m]); + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(real_sequence[i][k][n] - (mean[k][0][m] + autoregressive_coeff[k][0][m] * + (real_sequence[i][k][n - 1] - mean[k][0][m]))); + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(real_sequence[i][k][n] - mean[k][0][m]); + } + } + } + } + + j += 2; + } + } + + i = 1; + for (j = 1;j < nb_variable;j++) { + for (k = 0;k < nb_segment;k++) { + mean_absolute_deviation = 0.; + + if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + for (n = 0;n < nb_sequence;n++) { + mean_absolute_deviation += seq->real_sequence[n][i][m]; + } + } +// mean_absolute_deviation /= (nb_sequence * (change_point[k + 1] - change_point[k] - 1) - 2); + mean_absolute_deviation /= (nb_sequence * (change_point[k + 1] - change_point[k] - 1) - 1); + } + + else { + for (m = change_point[k];m < change_point[k + 1];m++) { + for (n = 0;n < nb_sequence;n++) { + mean_absolute_deviation += seq->real_sequence[n][i][m]; + } + } + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + mean_absolute_deviation /= (nb_sequence * (change_point[k + 1] - change_point[k]) - 2); + } + else { + mean_absolute_deviation /= (nb_sequence * (change_point[k + 1] - change_point[k]) - 1); + } + } + + for (m = change_point[k];m < change_point[k + 1];m++) { + for (n = 0;n < nb_sequence;n++) { + seq->real_sequence[n][i + 1][m] = mean_absolute_deviation; + } + } + } + + i += 2; + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + j = 1; + for (k = 1;k < nb_variable;k++) { + if (type[k] != REAL_VALUE) { + if ((model_type[k - 1] == LINEAR_MODEL_CHANGE) || (model_type[k - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(int_sequence[i][k][n] - (intercept[k][i][m] + slope[k][i][m] * seq_index_parameter[n])); + } + } + } + + else if ((model_type[k - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[k - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (m = 0;m < nb_segment;m++) { + seq->real_sequence[i][j][change_point[m]] = fabs(int_sequence[i][k][change_point[m]] - mean[k][i][m]); + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(int_sequence[i][k][n] - (mean[k][i][m] + autoregressive_coeff[k][i][m] * + (int_sequence[i][k][n - 1] - mean[k][i][m]))); + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(int_sequence[i][k][n] - mean[k][i][m]); + } + } + } + } + + else { + if ((model_type[k - 1] == LINEAR_MODEL_CHANGE) || (model_type[k - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(real_sequence[i][k][n] - (intercept[k][i][m] + slope[k][i][m] * seq_index_parameter[n])); + } + } + } + + else if ((model_type[k - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[k - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (m = 0;m < nb_segment;m++) { + seq->real_sequence[i][j][change_point[m]] = fabs(real_sequence[i][k][change_point[m]] - mean[k][i][m]); + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(real_sequence[i][k][n] - (mean[k][i][m] + autoregressive_coeff[k][i][m] * + (real_sequence[i][k][n - 1] - mean[k][i][m]))); + } + } + } + + else { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j][n] = fabs(real_sequence[i][k][n] - mean[k][i][m]); + } + } + } + } + + for (m = 0;m < nb_segment;m++) { + mean_absolute_deviation = 0.; + + if ((model_type[k - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[k - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (n = change_point[m] + 1;n < change_point[m + 1];n++) { + mean_absolute_deviation += seq->real_sequence[i][j][n]; + } +// mean_absolute_deviation /= (change_point[m + 1] - change_point[m] - 3); + mean_absolute_deviation /= (change_point[m + 1] - change_point[m] - 2); + } + + else { + for (n = change_point[m];n < change_point[m + 1];n++) { + mean_absolute_deviation += seq->real_sequence[i][j][n]; + } + if ((model_type[k - 1] == LINEAR_MODEL_CHANGE) || (model_type[k - 1] == INTERCEPT_SLOPE_CHANGE)) { + mean_absolute_deviation /= (change_point[m + 1] - change_point[m] - 2); + } + else { + mean_absolute_deviation /= (change_point[m + 1] - change_point[m] - 1); + } + } + + for (n = change_point[m];n < change_point[m + 1];n++) { + seq->real_sequence[i][j + 1][n] = mean_absolute_deviation; + } + } + + j += 2; + } + } + } + break; + } + + case DIVISION_RESIDUAL : { + if (common_contrast) { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + real_sequence[i][j] = new double[length[i]]; + + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + if (intercept[j][0][k] + slope[j][0][k] * seq_index_parameter[m] != 0.) { + real_sequence[i][j][m] = int_sequence[i][j][m] / (intercept[j][0][k] + slope[j][0][k] * seq_index_parameter[m]); + } + else { + real_sequence[i][j][m] = int_sequence[i][j][m]; + } + } + } + } + + else if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + if (mean[j][0][k] != 0.) { + real_sequence[i][j][change_point[k]] = int_sequence[i][j][change_point[k]] / mean[j][0][k]; + } + else { + real_sequence[i][j][change_point[k]] = int_sequence[i][j][change_point[k]]; + } + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + if (mean[j][0][k] + autoregressive_coeff[j][0][k] * (int_sequence[i][j][m - 1] - mean[j][0][k]) != 0.) { + real_sequence[i][j][m] = int_sequence[i][j][m] / (mean[j][0][k] + autoregressive_coeff[j][0][k] * + (int_sequence[i][j][m - 1] - mean[j][0][k])); + } + else { + real_sequence[i][j][m] = int_sequence[i][j][m]; + } + } + } + } + + else { + for (k = 0;k < nb_segment;k++) { + if (mean[j][0][k] != 0.) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m] / mean[j][0][k]; + } + } + else { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m]; + } + } + } + } + + delete [] int_sequence[i][j]; + int_sequence[i][j] = NULL; + } + + else { + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + if (intercept[j][0][k] + slope[j][0][k] * seq_index_parameter[m] != 0.) { + real_sequence[i][j][m] /= (intercept[j][0][k] + slope[j][0][k] * seq_index_parameter[m]); + } + } + } + } + + else if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k + 1] - 1;m > change_point[k];m--) { + if (mean[j][0][k] + autoregressive_coeff[j][0][k] * (real_sequence[i][j][m - 1] - mean[j][0][k]) != 0.) { + real_sequence[i][j][m] /= (mean[j][0][k] + autoregressive_coeff[j][0][k] * (real_sequence[i][j][m - 1] - mean[j][0][k])); + } + } + if (mean[j][0][k] != 0.) { + real_sequence[i][j][change_point[k]] /= mean[j][0][k]; + } + + } + } + + else { + for (k = 0;k < nb_segment;k++) { + if (mean[j][0][k] != 0.) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] /= mean[j][0][k]; + } + } + } + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + real_sequence[i][j] = new double[length[i]]; + + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + if (intercept[j][i][k] + slope[j][i][k] * seq_index_parameter[m] != 0.) { + real_sequence[i][j][m] = int_sequence[i][j][m] / (intercept[j][i][k] + slope[j][i][k] * seq_index_parameter[m]); + } + else { + real_sequence[i][j][m] = int_sequence[i][j][m]; + } + } + } + } + + else if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + if (mean[j][i][k] != 0.) { + real_sequence[i][j][change_point[k]] = int_sequence[i][j][change_point[k]] / mean[j][i][k]; + } + else { + real_sequence[i][j][change_point[k]] = int_sequence[i][j][change_point[k]]; + } + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + if (mean[j][i][k] + autoregressive_coeff[j][i][k] * (int_sequence[i][j][m - 1] - mean[j][i][k]) != 0.) { + real_sequence[i][j][m] = int_sequence[i][j][m] / (mean[j][i][k] + autoregressive_coeff[j][i][k] * + (int_sequence[i][j][m - 1] - mean[j][i][k])); + } + else { + real_sequence[i][j][m] = int_sequence[i][j][m]; + } + } + } + } + + else { + for (k = 0;k < nb_segment;k++) { + if (mean[j][i][k] != 0.) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m] / mean[j][i][k]; + } + } + else { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] = int_sequence[i][j][m]; + } + } + } + } + + delete [] int_sequence[i][j]; + int_sequence[i][j] = NULL; + } + + else { + if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == INTERCEPT_SLOPE_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k];m < change_point[k + 1];m++) { + if (intercept[j][i][k] + slope[j][i][k] * seq_index_parameter[m] != 0.) { + real_sequence[i][j][m] /= (intercept[j][i][k] + slope[j][i][k] * seq_index_parameter[m]); + } + } + } + } + + else if ((model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + for (k = 0;k < nb_segment;k++) { + for (m = change_point[k + 1] - 1;m > change_point[k];m--) { + if (mean[j][i][k] + autoregressive_coeff[j][i][k] * (real_sequence[i][j][m - 1] - mean[j][i][k]) != 0.) { + real_sequence[i][j][m] /= (mean[j][i][k] + autoregressive_coeff[j][i][k] * + (real_sequence[i][j][m - 1] - mean[j][i][k])); + } + } + } + if (mean[j][i][change_point[k]] != 0.) { + real_sequence[i][j][change_point[k]] /= mean[j][i][k]; + } + } + + else { + for (k = 0;k < nb_segment;k++) { + if (mean[j][i][k] != 0.) { + for (m = change_point[k];m < change_point[k + 1];m++) { + real_sequence[i][j][m] /= mean[j][i][k]; + } + } + } + } + } + } + } + } + break; + } + } + + if (!ichange_point) { + delete [] change_point; + } + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if (common_contrast) { + delete [] mean[i][0]; + for (j = 0;j < 4;j++) { + delete [] variance[i][j]; + } + } + else{ + for (j = 0;j < nb_sequence;j++) { + delete [] mean[i][j]; + delete [] variance[i][j]; + } + } + + delete [] mean[i]; + delete [] variance[i]; + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if (common_contrast) { + delete [] intercept[i][0]; + delete [] slope[i][0]; + delete [] variance[i][0]; + delete [] correlation[i][0]; + delete [] slope_standard_deviation[i][0]; + delete [] index_parameter_mean[i][0]; + delete [] index_parameter_variance[i][0]; + + if (continuity) { + delete [] corrected_intercept[i][0]; + delete [] corrected_intercept[i]; + delete [] corrected_slope[i][0]; + delete [] corrected_slope[i]; + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + delete [] intercept[i][j]; + delete [] slope[i][j]; + delete [] variance[i][j]; + delete [] correlation[i][j]; + delete [] slope_standard_deviation[i][j]; + delete [] index_parameter_mean[i][j]; + delete [] index_parameter_variance[i][j]; + } + } + + delete [] intercept[i]; + delete [] slope[i]; + delete [] variance[i]; + delete [] correlation[i]; + delete [] slope_standard_deviation[i]; + delete [] index_parameter_mean[i]; + delete [] index_parameter_variance[i]; + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if (common_contrast) { + delete [] mean[i][0]; + delete [] autoregressive_coeff[i][0]; + delete [] variance[i][0]; + delete [] determination_coeff[i][0]; + } + + else { + for (j = 0;j < nb_sequence;j++) { + delete [] mean[i][j]; + delete [] autoregressive_coeff[i][j]; + delete [] variance[i][j]; + delete [] determination_coeff[i][j]; + } + } + + delete [] mean[i]; + delete [] autoregressive_coeff[i]; + delete [] variance[i]; + delete [] determination_coeff[i]; + } + } + + delete [] mean; + delete [] variance; + delete [] global_variance; + delete [] intercept; + delete [] slope; + delete [] correlation; + delete [] slope_standard_deviation; + delete [] index_parameter_mean; + delete [] index_parameter_variance; + delete [] autoregressive_coeff; + delete [] determination_coeff; + + if (continuity) { + delete [] corrected_intercept; + delete [] corrected_slope; + } + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + if (output == SEQUENCE) { + delete [] piecewise_function_flag; + } + + if ((output == SUBTRACTION_RESIDUAL) || (output == DIVISION_RESIDUAL)) { + for (i = 1;i < nb_variable;i++) { + type[i] = REAL_VALUE; + } + + seq = this; + } + + if (output == SEQUENCE) { + for (i = 1;i < seq->nb_variable;i++) { + if (seq->type[i] == AUXILIARY) { + seq->min_value_computation(i); + seq->max_value_computation(i); + } + } + } + + else if (output == ABSOLUTE_RESIDUAL) { + for (i = 1;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + if (seq->type[i] != AUXILIARY) { + seq->build_marginal_histogram(i); + } + } + } + + else { + for (i = 1;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + if (seq->marginal_distribution[i]) { + delete seq->marginal_distribution[i]; + seq->marginal_distribution[i] = NULL; + } + + seq->build_marginal_histogram(i); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Segmentation of a single sequence or a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the segmentation, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] ichange_point change points, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output (sequence or residuals), + * \param[in] continuity flag continuous piecewise linear function. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation(StatError &error , ostream *os , int iidentifier , + int nb_segment , int *ichange_point , segment_model *model_type , + bool common_contrast , double *shape_parameter , + sequence_type output , bool continuity) const + +{ + bool status = true; + int i , j , k , m; + int index , segmentation_index , seq_length , count , max_nb_value , nb_parameter , + *change_point = NULL , *inf_bound_parameter , *frequency , *seq_index_parameter; + double sum , factorial_sum , binomial_coeff_sum , proba , mean , diff , index_parameter_mean , + index_parameter_diff , index_parameter_sum , shifted_diff , segmentation_likelihood , + segment_penalty , penalized_likelihood , **rank , **seq_mean; + long double index_parameter_square_sum , square_sum , mix_square_sum , shifted_square_sum , + autocovariance , **residual; + FrequencyDistribution *marginal; + Sequences *iseq , *seq , *oseq; + + + oseq = NULL; + error.init(); + +/* if (((index_param_type == TIME) && (index_interval->variance > 0.)) || + (index_param_type == POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + if (index_param_type == POSITION) { + status = false; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE] , SEQ_index_parameter_word[TIME]); + } */ + + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == POISSON_CHANGE) || + (model_type[i] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type[i] == ORDINAL_GAUSSIAN_CHANGE)) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (((model_type[i] != NEGATIVE_BINOMIAL_0_CHANGE) && (min_value[i] < 0)) || + ((model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 1))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else if (model_type[i] == CATEGORICAL_CHANGE) { + if ((marginal_distribution[i]->nb_value < 2) || + (marginal_distribution[i]->nb_value > NB_OUTPUT)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_VALUE]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + + if (((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == ORDINAL_GAUSSIAN_CHANGE)) && + ((output == SUBTRACTION_RESIDUAL) || (output == ABSOLUTE_RESIDUAL) || (output == DIVISION_RESIDUAL))) { + status = false; + error.update(SEQ_error[SEQR_FORBIDDEN_OUTPUT]); + } + } + + else if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (((model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && + (index_param_type != IMPLICIT_TYPE) && (index_interval->variance > 0.)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + if (((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || + (model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && + (output == SEQUENCE_SAMPLE)) { + status = false; + error.update(SEQ_error[SEQR_FORBIDDEN_OUTPUT]); + } + } + + if (iidentifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (iidentifier == identifier[i]) { + index = i; + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + else { + index = I_DEFAULT; + if (length_distribution->variance > 0.) { + status = false; + error.update(SEQ_error[SEQR_VARIABLE_SEQUENCE_LENGTH]); + } + } + + if (((index != I_DEFAULT) || (!common_contrast)) && (output == SEQUENCE_SAMPLE)) { + status = false; + error.update(SEQ_error[SEQR_FORBIDDEN_OUTPUT]); + } + + if (status) { + segmentation_index = (index == I_DEFAULT ? 0 : index); + seq_length = length[segmentation_index]; + + if ((nb_segment < 1) || (nb_segment > seq_length / 2)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEGMENT]); + } + + else { + change_point = new int[nb_segment + 1]; + + if (index_parameter) { + change_point[0] = index_parameter[segmentation_index][0]; + change_point[nb_segment] = index_parameter[segmentation_index][seq_length - 1] + 1; + } + else { + change_point[0] = 0; + change_point[nb_segment] = seq_length; + } + + for (i = 1;i < nb_segment;i++) { + change_point[i] = ichange_point[i - 1]; + } + + for (i = 1;i < nb_segment - 1;i++) { + if (change_point[i] >= change_point[i + 1]) { + status = false; + error.update(SEQ_error[SEQR_CHANGE_POINT]); + } + } + + if (index_parameter) { + change_point[0] = 0; + i = 1; + for (j = 1;j < seq_length;j++) { + if (index_parameter[segmentation_index][j] == change_point[i]) { + change_point[i++] = j; + } + } + + if (i < nb_segment) { + status = false; + error.update(SEQ_error[SEQR_CHANGE_POINT]); + } + else { + change_point[nb_segment] = seq_length; + } + } + } + } + + if (status) { + max_nb_value = 0; + inf_bound_parameter = new int[nb_variable]; + seq_mean = new double*[nb_variable]; + seq_index_parameter = NULL; + rank = new double*[nb_variable]; + residual = NULL; + + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == CATEGORICAL_CHANGE) && (marginal_distribution[i]->nb_value > max_nb_value)) { + max_nb_value = marginal_distribution[i]->nb_value; + } + + if ((model_type[i] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i] = 1; + break; + } + } + + // computation of sequence means for Gaussian change in the variance model or + // stationary piecewise autoregressive models + + if ((model_type[i] == VARIANCE_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + seq_mean[i] = new double[nb_sequence]; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += int_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += real_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + } + + else { + seq_mean[i] = new double[1]; + seq_mean[i][0] = 0.; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += int_sequence[k][i][j]; + } + } + } + + else { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += real_sequence[k][i][j]; + } + } + } + + seq_mean[i][0] /= (nb_sequence * length[0]); + } + } + + else { + seq_mean[i] = NULL; + } + + // rank computation for ordinal variables + + if (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) { + rank[i] = marginal_distribution[i]->rank_computation(); + } + else { + rank[i] = NULL; + } + + if (((i == 0) && (model_type[0] == INTERCEPT_SLOPE_CHANGE)) || + ((model_type[i] == LINEAR_MODEL_CHANGE) && (!seq_index_parameter))) { + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + } + + if (((i == 0) && ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE))) || + (((model_type[i] == GAUSSIAN_CHANGE) || (model_type[i] == VARIANCE_CHANGE) || + (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i] == LINEAR_MODEL_CHANGE) || + (model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && (!residual))) { + residual = new long double*[nb_sequence]; + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + residual[j] = new long double[nb_segment]; + } + else { + residual[j] = NULL; + } + } + } + else { + residual[0] = new long double[nb_segment]; + } + } + } + + if (max_nb_value > 0) { + frequency = new int[max_nb_value]; + } + else { + frequency = NULL; + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + segmentation_likelihood = 0.; + } + + for (i = 0;i < nb_variable;i++) { + + // computation of segment log-likelihoods + + if (model_type[i] == CATEGORICAL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + for (m = 0;m < marginal_distribution[i]->nb_value;m++) { + frequency[m] = 0; + } + + for (m = change_point[k];m < change_point[k + 1];m++) { + frequency[int_sequence[j][i][m]]++; + } + + for (m = 0;m < marginal_distribution[i]->nb_value;m++) { + if (frequency[m] > 0) { + segmentation_likelihood += frequency[m] * log((double)frequency[m] / + (double)(change_point[k + 1] - change_point[k])); + } + } + } + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + frequency[k] = 0; + } + + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + frequency[int_sequence[m][i][k]]++; + } + } + + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + if (frequency[k] > 0) { + segmentation_likelihood += frequency[k] * log((double)frequency[k] / + (double)(nb_sequence * (change_point[j + 1] - change_point[j]))); + } + } + } + } + } + + else if (model_type[i] == POISSON_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + sum = 0.; + factorial_sum = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + sum += int_sequence[j][i][m]; + factorial_sum += log_factorial(int_sequence[j][i][m]); + } + + if (sum > 0.) { + segmentation_likelihood += sum * (log(sum / (change_point[k + 1] - change_point[k])) - 1) - + factorial_sum; + } + } + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { + sum = 0.; + factorial_sum = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + sum += int_sequence[m][i][k]; + factorial_sum += log_factorial(int_sequence[m][i][k]); + } + } + + if (sum > 0.) { + segmentation_likelihood += sum * (log(sum / (nb_sequence * (change_point[j + 1] - change_point[j]))) - 1) - + factorial_sum; + } + } + } + } + + else if ((model_type[i] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + sum = 0.; + binomial_coeff_sum = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + sum += int_sequence[j][i][m]; + binomial_coeff_sum += log_binomial_coefficient(inf_bound_parameter[i] , shape_parameter[i] , + int_sequence[j][i][m]); + } + + if (sum > inf_bound_parameter[i] * (change_point[k + 1] - change_point[k])) { + proba = shape_parameter[i] * (change_point[k + 1] - change_point[k]) / + ((shape_parameter[i] - inf_bound_parameter[i]) * (change_point[k + 1] - change_point[k]) + sum); + segmentation_likelihood += binomial_coeff_sum + shape_parameter[i] * (change_point[k + 1] - change_point[k]) * log(proba) + + (sum - inf_bound_parameter[i] * (change_point[k + 1] - change_point[k])) * log(1. - proba); + } + else { + segmentation_likelihood = D_INF; + break; + } + } + } + + if (segmentation_likelihood == D_INF) { + break; + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { + sum = 0.; + binomial_coeff_sum = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + sum += int_sequence[m][i][k]; + binomial_coeff_sum += log_binomial_coefficient(inf_bound_parameter[i] , shape_parameter[i] , + int_sequence[m][i][k]); + } + } + + if (sum > inf_bound_parameter[i] * nb_sequence * (change_point[j + 1] - change_point[j])) { + proba = shape_parameter[i] * nb_sequence * (change_point[j + 1] - change_point[j]) / + ((shape_parameter[i] - inf_bound_parameter[i]) * nb_sequence * (change_point[j + 1] - change_point[j]) + sum); + segmentation_likelihood += binomial_coeff_sum + shape_parameter[i] * nb_sequence * (change_point[j + 1] - change_point[j]) * log(proba) + + (sum - inf_bound_parameter[i] * nb_sequence * (change_point[j + 1] - change_point[j])) * log(1. - proba); + } + else { + segmentation_likelihood = D_INF; + break; + } + } + } + } + + else if ((model_type[i] == GAUSSIAN_CHANGE)|| (model_type[0] == MEAN_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { +/* residual[j][k] = 0.; + sum = int_sequence[j][i][change_point[k]]; + + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + diff = int_sequence[j][i][m] - sum / (m - change_point[k]); + residual[j][k] += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + diff * diff; + sum += int_sequence[j][i][m]; + } */ + + mean = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + mean += int_sequence[j][i][m]; + } + mean /= (change_point[k + 1] - change_point[k]); + + residual[j][k] = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + diff = int_sequence[j][i][m] - mean; + residual[j][k] += diff * diff; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { +/* residual[j][k] = 0.; + sum = real_sequence[j][i][change_point[k]]; + + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + diff = real_sequence[j][i][m] - sum / (m - change_point[k]); + residual[j][k] += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + diff * diff; + sum += real_sequence[j][i][m]; + } */ + + mean = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + mean += real_sequence[j][i][m]; + } + mean /= (change_point[k + 1] - change_point[k]); + + residual[j][k] = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + diff = real_sequence[j][i][m] - mean; + residual[j][k] += diff * diff; + } + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_segment;j++) { +/* residual[0][j] = 0.; + sum = 0.; + count = 0; + + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + if (count > 0) { + diff = int_sequence[m][i][k] - sum / count; + residual[0][j] += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += int_sequence[m][i][k]; + } + } */ + + mean = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + mean += int_sequence[m][i][k]; + } + } + mean /= nb_sequence * (change_point[j + 1] - change_point[j]); + + residual[0][j] = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = int_sequence[m][i][k] - mean; + residual[0][j] += diff * diff; + } + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { +/* residual[0][j] = 0.; + sum = 0.; + count = 0; + + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + if (count > 0) { + diff = real_sequence[m][i][k] - sum / count; + residual[0][j] += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += real_sequence[m][i][k]; + } + } */ + + mean = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + mean += real_sequence[m][i][k]; + } + } + mean /= nb_sequence * (change_point[j + 1] - change_point[j]); + + residual[0][j] = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = real_sequence[m][i][k] - mean; + residual[0][j] += diff * diff; + } + } + } + } + } + } + + else if (model_type[i] == VARIANCE_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + residual[j][k] = 0.; + + for (m = change_point[k];m < change_point[k + 1];m++) { + diff = int_sequence[j][i][m] - seq_mean[i][j]; + residual[j][k] += diff * diff; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + residual[j][k] = 0.; + + for (m = change_point[k];m < change_point[k + 1];m++) { + diff = real_sequence[j][i][m] - seq_mean[i][j]; + residual[j][k] += diff * diff; + } + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_segment;j++) { + residual[0][j] = 0.; + + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = int_sequence[m][i][k] - seq_mean[i][0]; + residual[0][j] += diff * diff; + } + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { + residual[0][j] = 0.; + + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = real_sequence[m][i][k] - seq_mean[i][0]; + residual[0][j] += diff * diff; + } + } + } + } + } + } + + else if (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { +/* residual[j][k] = 0.; + sum = rank[i][int_sequence[j][i][change_point[k]]]; + + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + diff = rank[i][int_sequence[j][i][m]] - sum / (m - change_point[k]); + residual[j][k] += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + diff * diff; + sum += rank[i][int_sequence[j][i][m]]; + } */ + + mean = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + mean += rank[i][int_sequence[j][i][m]]; + } + mean /= (change_point[k + 1] - change_point[k]); + + residual[j][k] = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + diff = rank[i][int_sequence[j][i][m]] - mean; + residual[j][k] += diff * diff; + } + + if (residual[j][k] == 0.) { + residual[j][k] = (change_point[k + 1] - change_point[k]) * MIN_RANK_SQUARE_SUM; + } + } + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { +/* residual[0][j] = 0.; + sum = 0.; + count = 0; + + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + if (count > 0) { + diff = rank[i][int_sequence[m][i][k]] - sum / count; + residual[0][j] += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += rank[i][int_sequence[m][i][k]]; + } + } */ + + mean = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + mean += rank[i][int_sequence[m][i][k]]; + } + } + mean /= nb_sequence * (change_point[j + 1] - change_point[j]); + + residual[0][j] = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = rank[i][int_sequence[m][i][k]] - mean; + residual[0][j] += diff * diff; + } + } + + if (residual[0][j] == 0.) { + residual[0][j] = nb_sequence * (change_point[k + 1] - change_point[k]) * MIN_RANK_SQUARE_SUM; + } + } + } + } + + else if ((model_type[i] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { +/* index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + index_parameter_sum = seq_index_parameter[change_point[k]]; + sum = int_sequence[j][i][change_point[k]]; + + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + index_parameter_diff = seq_index_parameter[m] - index_parameter_sum / (m - change_point[k]); + index_parameter_square_sum += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + index_parameter_diff * index_parameter_diff; + diff = int_sequence[j][i][m] - sum / (m - change_point[k]); + square_sum += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + diff * diff; + mix_square_sum += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + index_parameter_diff * diff; + index_parameter_sum += seq_index_parameter[m]; + sum += int_sequence[j][i][m]; + } */ + + index_parameter_mean = 0.; + mean = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + index_parameter_mean += seq_index_parameter[m]; + mean += int_sequence[j][i][m]; + } + index_parameter_mean /= (change_point[k + 1] - change_point[k]); + mean /= (change_point[k + 1] - change_point[k]); + + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + index_parameter_diff = seq_index_parameter[m] - index_parameter_mean; + diff = int_sequence[j][i][m] - mean; + index_parameter_square_sum += index_parameter_diff * index_parameter_diff; + square_sum += diff * diff; + mix_square_sum += index_parameter_diff * diff; + } + + if ((change_point[k + 1] - change_point[k] > 2) && (index_parameter_square_sum > 0.)) { + residual[j][k] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[j][k] = 0.; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { +/* index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + index_parameter_sum = seq_index_parameter[change_point[k]]; + sum = real_sequence[j][i][change_point[k]]; + + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + index_parameter_diff = seq_index_parameter[m] - index_parameter_sum / (m - change_point[k]); + index_parameter_square_sum += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + index_parameter_diff * index_parameter_diff; + diff = real_sequence[j][i][m] - sum / (m - change_point[k]); + square_sum += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + diff * diff; + mix_square_sum += ((double)(m - change_point[k]) / (double)(m - change_point[k] + 1)) * + index_parameter_diff * diff; + index_parameter_sum += seq_index_parameter[m]; + sum += real_sequence[j][i][m]; + } */ + + index_parameter_mean = 0.; + mean = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + index_parameter_mean += seq_index_parameter[m]; + mean += real_sequence[j][i][m]; + } + index_parameter_mean /= (change_point[k + 1] - change_point[k]); + mean /= (change_point[k + 1] - change_point[k]); + + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + index_parameter_diff = seq_index_parameter[m] - index_parameter_mean; + diff = real_sequence[j][i][m] - mean; + index_parameter_square_sum += index_parameter_diff * index_parameter_diff; + square_sum += diff * diff; + mix_square_sum += index_parameter_diff * diff; + } + + if ((change_point[k + 1] - change_point[k] > 2) && (index_parameter_square_sum > 0.)) { + residual[j][k] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[j][k] = 0.; + } + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_segment;j++) { +/* index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + count = 1; + + index_parameter_sum = nb_sequence * seq_index_parameter[change_point[j]]; + sum = int_sequence[0][i][change_point[j]]; + for (k = 1;k < nb_sequence;k++) { + diff = int_sequence[k][i][change_point[j]] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + count++; + sum += int_sequence[k][i][change_point[j]]; + } + + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_sum / count; + index_parameter_square_sum += ((double)count / (double)(count + 1)) * + index_parameter_diff * index_parameter_diff; + diff = int_sequence[m][i][k] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + mix_square_sum += ((double)count / (double)(count + 1)) * index_parameter_diff * diff; + count++; + index_parameter_sum += seq_index_parameter[k]; + sum += int_sequence[m][i][k]; + } + } */ + + index_parameter_mean = 0.; + mean = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + index_parameter_mean += seq_index_parameter[k]; + for (m = 0;m < nb_sequence;m++) { + mean += int_sequence[m][i][k]; + } + } + index_parameter_mean /= (change_point[j + 1] - change_point[j]); + mean /= nb_sequence * (change_point[j + 1] - change_point[j]); + + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_mean; + index_parameter_square_sum += index_parameter_diff * index_parameter_diff; + for (m = 0;m < nb_sequence;m++) { + diff = int_sequence[m][i][k] - mean; + square_sum += diff * diff; + mix_square_sum += index_parameter_diff * diff; + } + } + index_parameter_square_sum *= nb_sequence; + + if (index_parameter_square_sum > 0.) { + residual[0][j] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[0][j] = 0.; + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { +/* index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + count = 1; + + index_parameter_sum = nb_sequence * seq_index_parameter[change_point[j]]; + sum = real_sequence[0][i][change_point[j]]; + for (k = 1;k < nb_sequence;k++) { + diff = real_sequence[k][i][change_point[j]] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + count++; + sum += real_sequence[k][i][change_point[j]]; + } + + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_sum / count; + index_parameter_square_sum += ((double)count / (double)(count + 1)) * + index_parameter_diff * index_parameter_diff; + diff = real_sequence[m][i][k] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + mix_square_sum += ((double)count / (double)(count + 1)) * index_parameter_diff * diff; + count++; + index_parameter_sum += seq_index_parameter[k]; + sum += real_sequence[m][i][k]; + } + } */ + + index_parameter_mean = 0.; + mean = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + index_parameter_mean += seq_index_parameter[k]; + for (m = 0;m < nb_sequence;m++) { + mean += real_sequence[m][i][k]; + } + } + index_parameter_mean /= (change_point[j + 1] - change_point[j]); + mean /= nb_sequence * (change_point[j + 1] - change_point[j]); + + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_mean; + index_parameter_square_sum += index_parameter_diff * index_parameter_diff; + for (m = 0;m < nb_sequence;m++) { + diff = real_sequence[m][i][k] - mean; + square_sum += diff * diff; + mix_square_sum += index_parameter_diff * diff; + } + } + index_parameter_square_sum *= nb_sequence; + + if (index_parameter_square_sum > 0.) { + residual[0][j] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[0][j] = 0.; + } + } + } + } + } + + else if (model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + mean = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + mean += int_sequence[j][i][m]; + } + mean /= (change_point[k + 1] - change_point[k]); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + diff = int_sequence[j][i][m] - mean; + shifted_diff = int_sequence[j][i][m - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + if (change_point[k + 1] - change_point[k] > 2) { + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + else { + residual[j][k] = 0.; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + mean = 0.; + for (m = change_point[k];m < change_point[k + 1];m++) { + mean += real_sequence[j][i][m]; + } + mean /= (change_point[k + 1] - change_point[k]); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + diff = real_sequence[j][i][m] - mean; + shifted_diff = real_sequence[j][i][m - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + if (change_point[k + 1] - change_point[k] > 2) { + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + else { + residual[j][k] = 0.; + } + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_segment;j++) { + mean = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + mean += int_sequence[m][i][k]; + } + } + mean /= nb_sequence * (change_point[j + 1] - change_point[j]); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = int_sequence[m][i][k] - mean; + shifted_diff = int_sequence[m][i][k - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { + mean = 0.; + for (k = change_point[j];k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + mean += real_sequence[m][i][k]; + } + } + mean /= nb_sequence * (change_point[j + 1] - change_point[j]); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = real_sequence[m][i][k] - mean; + shifted_diff = real_sequence[m][i][k - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + + else if (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + diff = int_sequence[j][i][m] - seq_mean[i][j]; + shifted_diff = int_sequence[j][i][m - 1] - seq_mean[i][j]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + if (change_point[k + 1] - change_point[k] > 2) { + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + else { + residual[j][k] = 0.; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (m = change_point[k] + 1;m < change_point[k + 1];m++) { + diff = real_sequence[j][i][m] - seq_mean[i][j]; + shifted_diff = real_sequence[j][i][m - 1] - seq_mean[i][j]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + if (change_point[k + 1] - change_point[k] > 2) { + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + else { + residual[j][k] = 0.; + } + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_segment;j++) { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = int_sequence[m][i][k] - seq_mean[i][0]; + shifted_diff = int_sequence[m][i][k - 1] - seq_mean[i][0]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (k = change_point[j] + 1;k < change_point[j + 1];k++) { + for (m = 0;m < nb_sequence;m++) { + diff = real_sequence[m][i][k] - seq_mean[i][0]; + shifted_diff = real_sequence[m][i][k - 1] - seq_mean[i][0]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + } + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + if ((model_type[i] == GAUSSIAN_CHANGE) || (model_type[i] == VARIANCE_CHANGE) || + (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i] == LINEAR_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { +// if (residual[j][k] > 0.) { + if (residual[j][k] > (change_point[k + 1] - change_point[k]) * ROUNDOFF_ERROR) { + segmentation_likelihood -= ((double)(change_point[k + 1] - change_point[k]) / 2.) * (logl(residual[j][k] / + (change_point[k + 1] - change_point[k])) + log(2 * M_PI) + 1); +/* segmentation_likelihood -= ((double)(change_point[k + 1] - change_point[k]) / 2.) * (logl(residual[j][k] / + (change_point[k + 1] - change_point[k])) + log(2 * M_PI)) + + (double)(change_point[k + 1] - change_point[k]) / 2.; */ + } + else { + segmentation_likelihood = D_INF; + break; + } + } + } + + if (segmentation_likelihood == D_INF) { + break; + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { +// if (residual[0][j] > 0.) { + if (residual[0][j] > nb_sequence * (change_point[j + 1] - change_point[j]) * ROUNDOFF_ERROR) { + segmentation_likelihood -= ((double)(nb_sequence * (change_point[j + 1] - change_point[j])) / 2.) * (logl(residual[0][j] / + (nb_sequence * (change_point[j + 1] - change_point[j]))) + log(2 * M_PI) + 1); +/* segmentation_likelihood -= ((double)(nb_sequence * (change_point[j + 1] - change_point[j])) / 2.) * (logl(residual[0][j] / + (nb_sequence * (change_point[j + 1] - change_point[j]))) + log(2 * M_PI)) + + (double)(nb_sequence * (change_point[j + 1] - change_point[j])) / 2.; */ + } + else { + segmentation_likelihood = D_INF; + break; + } + } + } + } + + else if ((model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < nb_segment;k++) { +// if (residual[j][k] > 0.) { + if (residual[j][k] > (change_point[k + 1] - change_point[k] - 1) * ROUNDOFF_ERROR) { + segmentation_likelihood -= ((double)(change_point[k + 1] - change_point[k] - 1) / 2.) * (logl(residual[j][k] / + (change_point[k + 1] - change_point[k] - 1)) + log(2 * M_PI) + 1); + } + else { + segmentation_likelihood = D_INF; + break; + } + } + } + + if (segmentation_likelihood == D_INF) { + break; + } + } + } + + else { + for (j = 0;j < nb_segment;j++) { +// if (residual[0][j] > 0.) { + if (residual[0][j] > nb_sequence * (change_point[j + 1] - change_point[j] - 1) * ROUNDOFF_ERROR) { + segmentation_likelihood -= ((double)(nb_sequence * (change_point[j + 1] - change_point[j] - 1)) / 2.) * (logl(residual[0][j] / + (nb_sequence * (change_point[j + 1] - change_point[j] - 1))) + log(2 * M_PI) + 1); + } + else { + segmentation_likelihood = D_INF; + break; + } + } + } + } + + if (segmentation_likelihood == D_INF) { + break; + } + } + + if ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if (index != I_DEFAULT) { + segmentation_likelihood = 0.; + + for (i = 1;i < nb_segment;i++) { + residual[index][0] += residual[index][i]; + } + +// if (residual[index][0] > 0.) { + if (residual[index][0] > length[index] * ROUNDOFF_ERROR) { + segmentation_likelihood = -((double)length[index] / 2.) * (logl(residual[index][0] / length[index]) + + log(2 * M_PI) + 1); +/* segmentation_likelihood = -((double)length[index] / 2.) * (logl(residual[index][0] / (length[index] - nb_segment)) + + log(2 * M_PI)) - (double)(length[index] - nb_segment) / 2.; */ + } + else { + segmentation_likelihood = D_INF; + } + } + + else { + if (!common_contrast) { + for (i = 0;i < nb_segment;i++) { + for (j = 1;j < nb_sequence;j++) { + residual[0][i] += residual[j][i]; + } + } + } + + for (i = 1;i < nb_segment;i++) { + residual[0][0] += residual[0][i]; + } + +// if (residual[0][0] > 0.) { + if (residual[0][0] > nb_sequence * length[0] * ROUNDOFF_ERROR) { + segmentation_likelihood = -((double)(nb_sequence * length[0]) / 2.) * + (logl(residual[0][0] / (nb_sequence * length[0])) + log(2 * M_PI) + 1); +/* segmentation_likelihood = -((double)(nb_sequence * length[0]) / 2.) * + (logl(residual[0][0] / (nb_sequence * (length[0] - nb_segment))) + log(2 * M_PI)) - + (double)(nb_sequence * (length[0] - nb_segment)) / 2.; */ + } + else { + segmentation_likelihood = D_INF; + } + } + } + + if (segmentation_likelihood != D_INF) { + if (index != I_DEFAULT) { + iseq = new Sequences(*this , 1 , &index); + seq = new Sequences(*iseq , ADD_STATE_VARIABLE); + delete iseq; + } + else { + seq = new Sequences(*this , ADD_STATE_VARIABLE); + } + + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < nb_segment;j++) { + for (k = change_point[j];k < change_point[j + 1];k++) { + seq->int_sequence[i][0][k] = j; + } + } + } + + seq->min_value[0] = 0; + seq->max_value[0] = nb_segment - 1; + + seq->build_marginal_frequency_distribution(0); + + if (os) { + segment_penalty = 0.; + for (i = 0;i < nb_segment;i++) { + segment_penalty += log((double)(change_point[i + 1] - change_point[i])); + } + + nb_parameter = seq->nb_parameter_computation((index == I_DEFAULT ? index : 0) , nb_segment , model_type , + common_contrast); + + penalized_likelihood = 2 * segmentation_likelihood - nb_parameter * + log((double)((seq->nb_variable - 1) * seq->length[0])) - segment_penalty; + + *os << "\n" << nb_segment << " " << (nb_segment == 1 ? SEQ_label[SEQL_SEGMENT] : SEQ_label[SEQL_SEGMENTS]) + << " 2 * " << STAT_label[STATL_LIKELIHOOD] << ": " << 2 * segmentation_likelihood << " " + << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (Modified " << STAT_criterion_word[BIC] << "): " + << penalized_likelihood << endl; + } + + oseq = seq->segmentation_output(nb_segment , model_type , common_contrast , os , output , + change_point , continuity); + + if ((output == SEQUENCE) || (output == ABSOLUTE_RESIDUAL)) { + delete seq; + } + } + + else { + oseq = NULL; + error.update(SEQ_error[SEQR_SEGMENTATION_FAILURE]); + } + + for (i = 0;i < nb_variable;i++) { + delete [] seq_mean[i]; + delete [] rank[i]; + } + delete [] seq_mean; + delete [] rank; + + delete [] frequency; + + if (residual) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + delete [] residual[i]; + } + } + else { + delete [] residual[0]; + } + delete [] residual; + } + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + } + + delete [] change_point; + + return oseq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Segmentation of a single sequence or a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the segmentation, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] ichange_point change points, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output (sequence or residuals), + * \param[in] continuity flag continuous piecewise linear function. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation(StatError &error , ostream *os , int iidentifier , + int nb_segment , vector &ichange_point , vector &model_type , + bool common_contrast , vector &shape_parameter , + sequence_type output , bool continuity) const + +{ + return segmentation(error , os , iidentifier , nb_segment , ichange_point.data() , model_type.data() , + common_contrast , shape_parameter.data() , output , continuity); +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/change_points2.cpp b/src/cpp/sequence_analysis/change_points2.cpp new file mode 100644 index 0000000..8a07273 --- /dev/null +++ b/src/cpp/sequence_analysis/change_points2.cpp @@ -0,0 +1,3050 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: change_points2.cpp 18669 2015-11-09 12:08:08Z guedon $ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + +extern double log_factorial(int value); +extern double log_binomial_coefficient(int inf_bound , double parameter , int value); + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the contrast functions within a forward recursion. + * + * \param[in] time time instant, + * \param[in] index sequence index, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] factorial log factorial for Poisson models, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] binomial_coeff log binomial coefficients for negative binomial models, + * \param[in] seq_mean sequence means for Gaussian change in the variance models or + * stationary piecewise autoregressive models, + * \param[in] seq_index_parameter index parameters, + * \param[in] hyperparam hyperparameters for Bayesian models, + * \param[in] rank ranks for ordinal variables, + * \param[in] contrast contrast functions, + * \param[in] nb_segment number of segments for bounding time loops. + */ +/*--------------------------------------------------------------*/ + +void Sequences::forward_contrast(int time , int index , segment_model *model_type , bool common_contrast , + double ***factorial , double *shape_parameter , double ***binomial_coeff , + double **seq_mean , int *seq_index_parameter , double **hyperparam , + double **rank , long double *contrast , int nb_segment) const + +{ + int i , j , k , m; + int max_nb_value , count , *frequency , *inf_bound_parameter; + double sum , factorial_sum , proba , binomial_coeff_sum , diff , index_parameter_sum , + index_parameter_diff , shifted_diff , range_diff , mean , buff; + long double index_parameter_square_sum , square_sum , mix_square_sum , shifted_square_sum , + autocovariance , prior_contrast , square_sum_term[3] , **residual; + + + // initializations + + max_nb_value = 0; + inf_bound_parameter = new int[nb_variable]; + residual = NULL; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == CATEGORICAL_CHANGE) && (marginal_distribution[i]->nb_value > max_nb_value)) { + max_nb_value = marginal_distribution[i]->nb_value; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + } + + if (((i == 1) && ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE))) || + (((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i - 1] == LINEAR_MODEL_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && (!residual))) { + residual = new long double*[MAX(nb_sequence , 2)]; + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + residual[j] = new long double[time + 1]; + } + else { + residual[j] = NULL; + } + } + } + else { + residual[0] = new long double[time + 1]; + } + } + } + + if (max_nb_value > 0) { + frequency = new int[max_nb_value]; + } + else { + frequency = NULL; + } + + // computation of segment contrast functions (log-likelihoods or sum of squared deviations) + + for (i = nb_segment;i <= time;i++) { + contrast[i] = 0.; + } + + for (i = 1;i < nb_variable;i++) { + if (model_type[i - 1] == CATEGORICAL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + frequency[k] = 0; + } + + frequency[int_sequence[j][i][time]]++; + +# ifdef MESSAGE + sum = 0.; +# endif + + for (k = time - 1;k >= nb_segment;k--) { + +# ifdef MESSAGE + sum += (time - k) * log((double)(time - k) / (double)(time - k + 1)) + + log((double)(frequency[int_sequence[j][i][k]] + 1) / (double)(time - k + 1)); + if (frequency[int_sequence[j][i][k]] > 0) { + sum -= frequency[int_sequence[j][i][k]] * + log((double)frequency[int_sequence[j][i][k]] / (double)(frequency[int_sequence[j][i][k]] + 1)); + } +/* frequency[int_sequence[j][i][k]]++; + + if (contrast[k] != D_INF) { + contrast[k] += sum; + } */ +# endif + + frequency[int_sequence[j][i][k]]++; + if (contrast[k] != D_INF) { + buff = 0.; + for (m = 0;m < marginal_distribution[i]->nb_value;m++) { + if (frequency[m] > 0) { +// contrast[k] += frequency[m] * log((double)frequency[m] / (double)(time - k + 1)); + buff += frequency[m] * log((double)frequency[m] / (double)(time - k + 1)); + } + } + contrast[k] += buff; + +# ifdef MESSAGE + if ((buff < sum - DOUBLE_ERROR) || (buff > sum + DOUBLE_ERROR)) { + cout << "\nERROR: " << k << " " << time << " " << j << " | " << buff << " " << sum << endl; + } +# endif + + } + } + } + } + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + frequency[j] = 0; + } + + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + frequency[int_sequence[k][i][j]]++; + } + + if (contrast[j] != D_INF) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + if (frequency[k] > 0) { + contrast[j] += frequency[k] * log((double)frequency[k] / (double)(nb_sequence * (time - j + 1))); + } + } + } + } + } + } + + else if (model_type[i - 1] == POISSON_CHANGE) { +/* for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + factorial[i][j][time] = log_factorial(int_sequence[j][i][time]); + } + } */ + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = 0.; + factorial_sum = 0.; + + for (k = time;k >= nb_segment;k--) { + sum += int_sequence[j][i][k]; + factorial_sum += factorial[i][j][k]; + if ((contrast[k] != D_INF) && (sum > 0.)) { + contrast[k] += sum * (log(sum / (time - k + 1)) - 1) - factorial_sum; + } + } + } + } + } + + else { + sum = 0.; + factorial_sum = 0.; + + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + factorial_sum += factorial[i][k][j]; + } + + if ((contrast[j] != D_INF) && (sum > 0.)) { + contrast[j] += sum * (log(sum / (nb_sequence * (time - j + 1))) - 1) - factorial_sum; + } + } + } + } + + else if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { +/* for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + binomial_coeff[i][j][time] = log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[j][i][time]); + } + } */ + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = 0.; + binomial_coeff_sum = 0.; + + for (k = time;k >= nb_segment;k--) { + sum += int_sequence[j][i][k]; + binomial_coeff_sum += binomial_coeff[i][j][k]; + + if (contrast[k] != D_INF) { + if (sum > inf_bound_parameter[i - 1] * (time - k + 1)) { + proba = shape_parameter[i - 1] * (time - k + 1) / + ((shape_parameter[i - 1] - inf_bound_parameter[i - 1]) * (time - k + 1) + sum); + contrast[k] += binomial_coeff_sum + shape_parameter[i - 1] * (time - k + 1) * log(proba) + + (sum - inf_bound_parameter[i - 1] * (time - k + 1)) * log(1. - proba); + } + else { + contrast[k] = D_INF; + } + } + } + } + } + } + + else { + sum = 0.; + binomial_coeff_sum = 0.; + + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + binomial_coeff_sum += binomial_coeff[i][k][j]; + } + + if (contrast[j] != D_INF) { + if (sum > inf_bound_parameter[i - 1] * nb_sequence * (time - j + 1)) { + proba = shape_parameter[i - 1] * nb_sequence * (time - j + 1) / + ((shape_parameter[i - 1] - inf_bound_parameter[i - 1]) * nb_sequence * (time - j + 1) + sum); + contrast[j] += binomial_coeff_sum + shape_parameter[i - 1] * nb_sequence * (time - j + 1) * log(proba) + + (sum - inf_bound_parameter[i - 1] * nb_sequence * (time - j + 1)) * log(1. - proba); + } + else { + contrast[j] = D_INF; + } + } + } + } + } + + else if ((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[0] == MEAN_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + sum = int_sequence[j][i][time]; + residual[j][time] = 0.; + + for (k = time - 1;k >= nb_segment;k--) { + diff = int_sequence[j][i][k] - sum / (time - k); + square_sum += ((double)(time - k) / (double)(time - k + 1)) * diff * diff; + sum += int_sequence[j][i][k]; + residual[j][k] = square_sum; + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + sum = real_sequence[j][i][time]; + residual[j][time] = 0.; + + for (k = time - 1;k >= nb_segment;k--) { + diff = real_sequence[j][i][k] - sum / (time - k); + square_sum += ((double)(time - k) / (double)(time - k + 1)) * diff * diff; + sum += real_sequence[j][i][k]; + residual[j][k] = square_sum; + } + } + } + } + +# ifdef DEBUG + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + cout << time << " | "; + for (k = time;k >= nb_segment;k--) { + cout << residual[j][k] << " "; + } + cout << endl; + } + } +# endif + + } + + else { + square_sum = 0.; + sum = 0.; + count = 0; + + if (type[i] != REAL_VALUE) { + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = int_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += int_sequence[k][i][j]; + } + residual[0][j] = square_sum; + } + } + + else { + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = real_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += real_sequence[k][i][j]; + } + residual[0][j] = square_sum; + } + } + +# ifdef MESSAGE + + // alternative implementation + + square_sum = 0.; + sum = 0.; + + residual[1] = new long double[length[0]]; + + if (type[i] != REAL_VALUE) { + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + square_sum += int_sequence[k][i][j] * int_sequence[k][i][j]; + } + residual[1][j] = square_sum - sum * sum / (nb_sequence * (time - j + 1)); + } + } + + else { + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + sum += real_sequence[k][i][j]; + square_sum += real_sequence[k][i][j] * real_sequence[k][i][j]; + } + residual[1][j] = square_sum - sum * sum / (nb_sequence * (time - j + 1)); + } + } + + for (j = time;j >= nb_segment;j--) { + if ((residual[1][j] < residual[0][j] - DOUBLE_ERROR) || (residual[1][j] > residual[0][j] + DOUBLE_ERROR)) { + cout << "\nERROR: " << time << " " << j << " | " << residual[1][j] << " " << residual[0][j] << endl; + } + } + + delete [] residual[1]; + +# endif + } + } + + else if (model_type[i - 1] == VARIANCE_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + for (k = time;k >= nb_segment;k--) { + diff = int_sequence[j][i][k] - seq_mean[i][j]; + square_sum += diff * diff; + residual[j][k] = square_sum; + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + for (k = time;k >= nb_segment;k--) { + diff = real_sequence[j][i][k] - seq_mean[i][j]; + square_sum += diff * diff; + residual[j][k] = square_sum; + } + } + } + } + } + + else { + square_sum = 0.; + + if (type[i] != REAL_VALUE) { + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][i][j] - seq_mean[i][0]; + square_sum += diff * diff; + } + residual[0][j] = square_sum; + } + } + + else { + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][i][j] - seq_mean[i][0]; + square_sum += diff * diff; + } + residual[0][j] = square_sum; + } + } + } + } + + else if (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + sum = rank[i][int_sequence[j][i][time]]; + residual[j][time] = 0.; + + for (k = time - 1;k >= nb_segment;k--) { + diff = rank[i][int_sequence[j][i][k]] - sum / (time - k); + square_sum += ((double)(time - k) / (double)(time - k + 1)) * diff * diff; + sum += rank[i][int_sequence[j][i][k]]; + residual[j][k] = square_sum; + + if (residual[j][k] == 0.) { + residual[j][k] = (time - k + 1) * MIN_RANK_SQUARE_SUM; + } + } + } + } + } + + else { + square_sum = 0.; + sum = 0.; + count = 0; + + for (j = time;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = rank[i][int_sequence[k][i][j]] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += rank[i][int_sequence[k][i][j]]; + } + residual[0][j] = square_sum; + + if (residual[0][j] == 0.) { + residual[0][j] = count * MIN_RANK_SQUARE_SUM; + } + } + } + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + index_parameter_sum = seq_index_parameter[time]; + sum = int_sequence[j][i][time]; + residual[j][time] = 0.; + + for (k = time - 1;k >= nb_segment;k--) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_sum / (time - k); + index_parameter_square_sum += ((double)(time - k) / (double)(time - k + 1)) * + index_parameter_diff * index_parameter_diff; + diff = int_sequence[j][i][k] - sum / (time - k); + square_sum += ((double)(time - k) / (double)(time - k + 1)) * diff * diff; + mix_square_sum += ((double)(time - k) / (double)(time - k + 1)) * index_parameter_diff * diff; + index_parameter_sum += seq_index_parameter[k]; + sum += int_sequence[j][i][k]; + + if ((k < time - 1) && (index_parameter_square_sum > 0.)) { + residual[j][k] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[j][k] = 0.; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + index_parameter_sum = seq_index_parameter[time]; + sum = real_sequence[j][i][time]; + residual[j][time] = 0.; + + for (k = time - 1;k >= nb_segment;k--) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_sum / (time - k); + index_parameter_square_sum += ((double)(time - k) / (double)(time - k + 1)) * + index_parameter_diff * index_parameter_diff; + diff = real_sequence[j][i][k] - sum / (time - k); + square_sum += ((double)(time - k) / (double)(time - k + 1)) * diff * diff; + mix_square_sum += ((double)(time - k) / (double)(time - k + 1)) * index_parameter_diff * diff; + index_parameter_sum += seq_index_parameter[k]; + sum += real_sequence[j][i][k]; + + if ((k < time - 1) && (index_parameter_square_sum > 0.)) { + residual[j][k] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[j][k] = 0.; + } + } + } + } + } + } + + else { + index_parameter_square_sum = 0.; + index_parameter_sum = nb_sequence * seq_index_parameter[time]; + square_sum = 0.; + mix_square_sum = 0.; + count = 1; + residual[0][time] = 0.; + + if (type[i] != REAL_VALUE) { + sum = int_sequence[0][i][time]; + for (j = 1;j < nb_sequence;j++) { + diff = int_sequence[j][i][time] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + count++; + sum += int_sequence[j][i][time]; + } + + for (j = time - 1;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + index_parameter_diff = seq_index_parameter[j] - index_parameter_sum / count; + index_parameter_square_sum += ((double)count / (double)(count + 1)) * + index_parameter_diff * index_parameter_diff; + diff = int_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + mix_square_sum += ((double)count / (double)(count + 1)) * index_parameter_diff * diff; + count++; + index_parameter_sum += seq_index_parameter[j]; + sum += int_sequence[k][i][j]; + } + + if (index_parameter_square_sum > 0.) { + residual[0][j] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[0][j] = 0.; + } + } + } + + else { + sum = real_sequence[0][i][time]; + for (j = 1;j < nb_sequence;j++) { + diff = real_sequence[j][i][time] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + count++; + sum += real_sequence[j][i][time]; + } + + for (j = time - 1;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + index_parameter_diff = seq_index_parameter[j] - index_parameter_sum / count; + index_parameter_square_sum += ((double)count / (double)(count + 1)) * + index_parameter_diff * index_parameter_diff; + diff = real_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + mix_square_sum += ((double)count / (double)(count + 1)) * index_parameter_diff * diff; + count++; + index_parameter_sum += seq_index_parameter[j]; + sum += real_sequence[k][i][j]; + } + + if (index_parameter_square_sum > 0.) { + residual[0][j] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[0][j] = 0.; + } + } + } + } + } + + else if (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = int_sequence[j][i][time]; + +# ifdef DEBUG + if (time == 10) { + cout << "\n"; + } +# endif + + if (time - 1 >= nb_segment) { + diff = int_sequence[j][i][time - 1] - int_sequence[j][i][time]; + square_sum = diff * diff / 4.; + shifted_square_sum = square_sum; + autocovariance = -square_sum; + sum += int_sequence[j][i][time - 1]; + residual[j][time - 1] = 0.; + +# ifdef DEBUG + if (time == 10) { + cout << time - 1 << " " << square_sum << " " << shifted_square_sum << " " << autocovariance << endl; + } +# endif + } + + for (k = time - 2;k >= nb_segment;k--) { + diff = int_sequence[j][i][k + 1] - sum / (time - k); + shifted_diff = int_sequence[j][i][k] - sum / (time - k); + square_sum += diff * diff + + ((double)(time - k) / ((double)(time - k + 1) * (time - k + 1))) * shifted_diff * shifted_diff; + shifted_square_sum += (1. + (double)(time - k) / ((double)(time - k + 1) * (time - k + 1))) * shifted_diff * shifted_diff - + (2. / (double)(time - k + 1)) * shifted_diff * (int_sequence[j][i][k] - int_sequence[j][i][time]); + autocovariance += diff * shifted_diff + + ((double)(time - k) / ((double)(time - k + 1) * (time - k + 1))) * shifted_diff * shifted_diff - + (1. / (double)(time - k + 1)) * shifted_diff * (int_sequence[j][i][k] - int_sequence[j][i][time]); + sum += int_sequence[j][i][k]; + + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + +# ifdef DEBUG + if (time == 10) { + cout << k << " " << square_sum << " " << shifted_square_sum << " " << autocovariance << endl; + } +# endif + + } + +# ifdef DEBUG + if (time == 10) { + cout << "\n"; + } + + sum = int_sequence[j][i][time]; + + for (k = time - 1;k >= nb_segment;k--) { + sum += int_sequence[j][i][k]; + mean = sum / (time - k + 1); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (m = k + 1;m <= time;m++) { + diff = int_sequence[j][i][m] - mean; + shifted_diff = int_sequence[j][i][m - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + buff = square_sum; + if (shifted_square_sum > 0.) { + buff -= autocovariance * autocovariance / shifted_square_sum; + } + + if (time == 10) { + cout << k << " " << square_sum << " " << shifted_square_sum << " " << autocovariance << " " + << residual[j][k] << " | " << buff << endl; + } + } +# endif + + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = real_sequence[j][i][time]; + + if (time - 1 >= nb_segment) { + diff = real_sequence[j][i][time - 1] - real_sequence[j][i][time]; + square_sum = diff * diff / 4.; + shifted_square_sum = square_sum; + autocovariance = -square_sum; + sum += real_sequence[j][i][time - 1]; + residual[j][time - 1] = 0.; + } + + for (k = time - 2;k >= nb_segment;k--) { + diff = real_sequence[j][i][k + 1] - sum / (time - k); + shifted_diff = real_sequence[j][i][k] - sum / (time - k); + square_sum += diff * diff + + ((double)(time - k) / ((double)(time - k + 1) * (time - k + 1))) * shifted_diff * shifted_diff; + shifted_square_sum += (1. + (double)(time - k) / ((double)(time - k + 1) * (time - k + 1))) * shifted_diff * shifted_diff - + (2. / (double)(time - k + 1)) * shifted_diff * (real_sequence[j][i][k] - real_sequence[j][i][time]); + autocovariance += diff * shifted_diff + + ((double)(time - k) / ((double)(time - k + 1) * (time - k + 1))) * shifted_diff * shifted_diff - + (1. / (double)(time - k + 1)) * shifted_diff * (real_sequence[j][i][k] - real_sequence[j][i][time]); + sum += real_sequence[j][i][k]; + + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { + sum = 0.; + for (j = 0;j < nb_sequence;j++) { + sum += int_sequence[j][i][time]; + } + + if (time - 1 >= nb_segment) { + for (j = 0;j < nb_sequence;j++) { + sum += int_sequence[j][i][time - 1]; + } + mean = sum / (nb_sequence * 2); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (j = 0;j < nb_sequence;j++) { + diff = int_sequence[j][i][time] - mean; + shifted_diff = int_sequence[j][i][time - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[0][time - 1] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][time - 1] -= autocovariance * autocovariance / shifted_square_sum; + } + } + + for (j = time - 2;j >= nb_segment;j--) { + mean = sum / (nb_sequence * (time - j)); + square_sum_term[0] = 0.; + square_sum_term[1] = 0.; + square_sum_term[2] = 0.; + shifted_diff = 0.; + range_diff = 0.; + + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + diff = int_sequence[k][i][j + 1] - mean; + square_sum_term[0] += diff * diff; + buff = int_sequence[k][i][j] - mean; + shifted_diff += buff; + square_sum_term[1] += buff * buff; + square_sum_term[2] += diff * buff; + range_diff += int_sequence[k][i][j] - int_sequence[k][i][time]; + } + + square_sum += square_sum_term[0] + + ((double)(time - j) / ((double)nb_sequence * (time - j + 1) * (time - j + 1))) * shifted_diff * shifted_diff; + shifted_square_sum += square_sum_term[1] + + ((double)(time - j) / ((double)nb_sequence * (time - j + 1) * (time - j + 1))) * shifted_diff * shifted_diff - + (2. / ((double)nb_sequence * (time - j + 1))) * shifted_diff * range_diff; + autocovariance += square_sum_term[2] + + ((double)(time - j) / ((double)nb_sequence * (time - j + 1) * (time - j + 1))) * shifted_diff * shifted_diff - + (1. / ((double)nb_sequence * (time - j + 1))) * shifted_diff * range_diff; + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + + else { + sum = 0.; + for (j = 0;j < nb_sequence;j++) { + sum += real_sequence[j][i][time]; + } + + if (time - 1 >= nb_segment) { + for (j = 0;j < nb_sequence;j++) { + sum += real_sequence[j][i][time - 1]; + } + mean = sum / (nb_sequence * 2); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (j = 0;j < nb_sequence;j++) { + diff = real_sequence[j][i][time] - mean; + shifted_diff = real_sequence[j][i][time - 1] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[0][time - 1] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][time - 1] -= autocovariance * autocovariance / shifted_square_sum; + } + } + + for (j = time - 2;j >= nb_segment;j--) { + mean = sum / (nb_sequence * (time - j)); + square_sum_term[0] = 0.; + square_sum_term[1] = 0.; + square_sum_term[2] = 0.; + shifted_diff = 0.; + range_diff = 0.; + + for (k = 0;k < nb_sequence;k++) { + sum += real_sequence[k][i][j]; + diff = real_sequence[k][i][j + 1] - mean; + square_sum_term[0] += diff * diff; + buff = real_sequence[k][i][j] - mean; + shifted_diff += buff; + square_sum_term[1] += buff * buff; + square_sum_term[2] += diff * buff; + range_diff += real_sequence[k][i][j] - real_sequence[k][i][time]; + } + + square_sum += square_sum_term[0] + + ((double)(time - j) / ((double)nb_sequence * (time - j + 1) * (time - j + 1))) * shifted_diff * shifted_diff; + shifted_square_sum += square_sum_term[1] + + ((double)(time - j) / ((double)nb_sequence * (time - j + 1) * (time - j + 1))) * shifted_diff * shifted_diff - + (2. / ((double)nb_sequence * (time - j + 1))) * shifted_diff * range_diff; + autocovariance += square_sum_term[2] + + ((double)(time - j) / ((double)nb_sequence * (time - j + 1) * (time - j + 1))) * shifted_diff * shifted_diff - + (1. / ((double)nb_sequence * (time - j + 1))) * shifted_diff * range_diff; + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + else if (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + + for (k = time - 1;k >= nb_segment;k--) { + diff = int_sequence[j][i][k + 1] - seq_mean[i][j]; + shifted_diff = int_sequence[j][i][k] - seq_mean[i][j]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + + for (k = time - 1;k >= nb_segment;k--) { + diff = real_sequence[j][i][k + 1] - seq_mean[i][j]; + shifted_diff = real_sequence[j][i][k] - seq_mean[i][j]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + } + + else { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + + if (type[i] != REAL_VALUE) { + for (j = time - 1;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][i][j + 1] - seq_mean[i][0]; + shifted_diff = int_sequence[k][i][j] - seq_mean[i][0]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + + else { + for (j = time - 1;j >= nb_segment;j--) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][i][j + 1] - seq_mean[i][0]; + shifted_diff = real_sequence[k][i][j] - seq_mean[i][0]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + else if (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) { + prior_contrast = -lgamma(hyperparam[i][0]) + hyperparam[i][0] * log(hyperparam[i][1]); + factorial[i][index][time] = log_factorial(int_sequence[index][i][time]); + + sum = 0.; + factorial_sum = 0.; + for (j = time;j >= nb_segment;j--) { + sum += int_sequence[index][i][j]; + factorial_sum += factorial[i][index][j]; + if (contrast[j] != D_INF) { + contrast[j] += prior_contrast - factorial_sum + lgamma(hyperparam[i][0] + sum) - + (hyperparam[i][0] + sum) * log(hyperparam[i][1] + time - j + 1); + } + } + } + + else if (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) { + prior_contrast = log(hyperparam[i][1]) / 2 - lgamma(hyperparam[i][2] / 2) + + hyperparam[i][2] * log(hyperparam[i][3] / 2) / 2; + + if (type[i] != REAL_VALUE) { + square_sum = 0.; + sum = int_sequence[index][i][time]; + if (contrast[time] != D_INF) { + diff = hyperparam[i][0] - sum; + contrast[time] += prior_contrast - log(2 * M_PI) / 2 - + log(hyperparam[i][1] + 1) / 2 + lgamma((hyperparam[i][2] + 1) / 2) - + (hyperparam[i][2] + 1) * + log((hyperparam[i][3] + hyperparam[i][1] * + diff * diff / (hyperparam[i][1] + 1)) / 2) / 2; + } + + for (j = time - 1;j >= nb_segment;j--) { + diff = int_sequence[index][i][j] - sum / (time - j); + square_sum += ((double)(time - j) / (double)(time - j + 1)) * diff * diff; + sum += int_sequence[index][i][j]; + if (contrast[j] != D_INF) { + diff = hyperparam[i][0] - sum / (time - j + 1); + contrast[j] += prior_contrast - (time - j + 1) * log(2 * M_PI) / 2 - + log(hyperparam[i][1] + time - j + 1) / 2 + + lgamma((hyperparam[i][2] + time - j + 1) / 2) - + (hyperparam[i][2] + time - j + 1) * + logl((hyperparam[i][3] + square_sum + hyperparam[i][1] * (time - j + 1) * + diff * diff / (hyperparam[i][1] + time - j + 1)) / 2) / 2; + } + } + } + + else { + square_sum = 0.; + sum = real_sequence[index][i][time]; + if (contrast[time] != D_INF) { + diff = hyperparam[i][0] - sum; + contrast[time] += prior_contrast - log(2 * M_PI) / 2 - + log(hyperparam[i][1] + 1) / 2 + lgamma((hyperparam[i][2] + 1) / 2) - + (hyperparam[i][2] + 1) * + log((hyperparam[i][3] + hyperparam[i][1] * + diff * diff / (hyperparam[i][1] + 1)) / 2) / 2; + } + + for (j = time - 1;j >= nb_segment;j--) { + diff = real_sequence[index][i][j] - sum / (time - j); + square_sum += ((double)(time - j) / (double)(time - j + 1)) * diff * diff; + sum += real_sequence[index][i][j]; + if (contrast[j] != D_INF) { + diff = hyperparam[i][0] - sum / (time - j + 1); + contrast[j] += prior_contrast - (time - j + 1) * log(2 * M_PI) / 2 - + log(hyperparam[i][1] + time - j + 1) / 2 + + lgamma((hyperparam[i][2] + time - j + 1) / 2) - + (hyperparam[i][2] + time - j + 1) * + logl((hyperparam[i][3] + square_sum + hyperparam[i][1] * (time - j + 1) * + diff * diff / (hyperparam[i][1] + time - j + 1)) / 2) / 2; + } + } + } + } + + if ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = time - 1;k >= nb_segment;k--) { + contrast[k] -= residual[j][k]; + } + } + } + } + + else { + for (j = time;j >= nb_segment;j--) { + contrast[j] -= residual[0][j]; + } + } + } + + else if ((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i - 1] == LINEAR_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = time;k >= nb_segment;k--) { + if (contrast[k] != D_INF) { +// if (residual[j][k] > 0.) { + if (residual[j][k] > (time - k + 1) * ROUNDOFF_ERROR) { + contrast[k] -= ((double)(time - k + 1) / 2.) * (logl(residual[j][k] / + (time - k + 1)) + log(2 * M_PI) + 1); +/* contrast[k] -= ((double)(time - k + 1) / 2.) * (logl(residual[j][k] / + (time - k)) + log(2 * M_PI)) + (double)(time - k) / 2.; */ + } + else { + contrast[k] = D_INF; + } + } + } + } + } + } + + else { + for (j = time;j >= nb_segment;j--) { + if (contrast[j] != D_INF) { +// if (residual[0][j] > 0.) { + if (residual[0][j] > nb_sequence * (time - j + 1) * ROUNDOFF_ERROR) { + contrast[j] -= ((double)(nb_sequence * (time - j + 1)) / 2.) * (logl(residual[0][j] / + (nb_sequence * (time - j + 1))) + log(2 * M_PI) + 1); + } + else { + contrast[j] = D_INF; + } + } + } + } + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = time - 1;k >= nb_segment;k--) { + if (contrast[k] != D_INF) { +// if (residual[j][k] > 0.) { + if (residual[j][k] > (time - k) * ROUNDOFF_ERROR) { + contrast[k] -= ((double)(time - k) / 2.) * (logl(residual[j][k] / + (time - k)) + log(2 * M_PI) + 1); + } + else { + contrast[k] = D_INF; + } + } + } + } + } + } + + else { + for (j = time - 1;j >= nb_segment;j--) { + if (contrast[j] != D_INF) { +// if (residual[0][j] > 0.) { + if (residual[0][j] > nb_sequence * (time - j) * ROUNDOFF_ERROR) { + contrast[j] -= ((double)(nb_sequence * (time - j)) / 2.) * (logl(residual[0][j] / + (nb_sequence * (time - j))) + log(2 * M_PI) + 1); + } + else { + contrast[j] = D_INF; + } + } + } + } + } + } + +# ifdef DEBUG + for (i = time - 1;i >= nb_segment;i--) { + cout << contrast[i] << " "; + } + cout << endl; +# endif + + delete [] frequency; + delete [] inf_bound_parameter; + + if (residual) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + delete [] residual[i]; + } + } + else { + delete [] residual[0]; + } + delete [] residual; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the contrast functions within a backward recursion. + * + * \param[in] time time instant, + * \param[in] index sequence index, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] factorial log factorials for Poisson models, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] binomial_coeff log binomial coefficients for negative binomial models, + * \param[in] seq_mean sequence means for Gaussian change in the variance models or + * stationary piecewise autoregressive models, + * \param[in] seq_index_parameter index parameters, + * \param[in] hyperparam hyperparameters for Bayesian models, + * \param[in] rank ranks for ordinal variables, + * \param[in] contrast contrast functions. + */ +/*--------------------------------------------------------------*/ + +void Sequences::backward_contrast(int time , int index , segment_model *model_type , bool common_contrast , + double ***factorial , double *shape_parameter , double ***binomial_coeff , + double **seq_mean , int *seq_index_parameter , double **hyperparam , + double **rank , long double *contrast) const + +{ + int i , j , k , m; + int max_nb_value , count , *frequency , *inf_bound_parameter; + double sum , factorial_sum , proba , binomial_coeff_sum , diff , index_parameter_sum , + index_parameter_diff , shifted_diff , range_diff , mean , buff; + long double index_parameter_square_sum , square_sum , mix_square_sum , shifted_square_sum , + autocovariance , prior_contrast , square_sum_term[3] , **residual; + + + // initializations + + max_nb_value = 0; + inf_bound_parameter = new int[nb_variable]; + residual = NULL; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == CATEGORICAL_CHANGE) && (marginal_distribution[i]->nb_value > max_nb_value)) { + max_nb_value = marginal_distribution[i]->nb_value; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + } + + if (((i == 1) && ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE))) || + (((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i - 1] == LINEAR_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE)) && (!residual))) { + residual = new long double*[MAX(nb_sequence , 2)]; + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + residual[j] = new long double[length[j]]; + } + else { + residual[j] = NULL; + } + } + } + else { + residual[0] = new long double[length[0]]; + } + } + } + + if (max_nb_value > 0) { + frequency = new int[max_nb_value]; + } + else { + frequency = NULL; + } + + // computation of segment contrast functions (log-likelihoods or sum of squared deviations) + + for (i = time;i < length[index == I_DEFAULT ? 0 : index];i++) { + contrast[i] = 0.; + } + + for (i = 1;i < nb_variable;i++) { + if (model_type[i - 1] == CATEGORICAL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + frequency[k] = 0; + } + + frequency[int_sequence[j][i][time]]++; +// sum = 0.; + + for (k = time + 1;k < length[j];k++) { + frequency[int_sequence[j][i][k]]++; + if (contrast[k] != D_INF) { + for (m = 0;m < marginal_distribution[i]->nb_value;m++) { + if (frequency[m] > 0) { + contrast[k] += frequency[m] * log((double)frequency[m] / (double)(k - time + 1)); + } + } + } + +/* sum += (k - time) * log((double)(k - time) / (double)(k - time + 1)) + + log((double)(frequency[int_sequence[j][i][k]] + 1) / (double)(k - time + 1)); + if (frequency[int_sequence[j][i][k]] > 0) { + sum -= frequency[int_sequence[j][i][k]] * + log((double)frequency[int_sequence[j][i][k]] / (double)(frequency[int_sequence[j][i][k]] + 1)); + } + frequency[int_sequence[j][i][k]]++; + + if (contrast[k] != D_INF) { + contrast[k] += sum; + } */ + } + } + } + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + frequency[j] = 0; + } + + for (j = time;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + frequency[int_sequence[k][i][j]]++; + } + + if (contrast[j] != D_INF) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + if (frequency[k] > 0) { + contrast[j] += frequency[k] * log((double)frequency[k] / (double)(nb_sequence * (j - time + 1))); + } + } + } + } + } + } + + else if (model_type[i - 1] == POISSON_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = 0.; + factorial_sum = 0.; + + for (k = time;k < length[j];k++) { + sum += int_sequence[j][i][k]; + factorial_sum += factorial[i][j][k]; + if ((contrast[k] != D_INF) && (sum > 0.)) { + contrast[k] += sum * (log(sum / (k - time + 1)) - 1) - factorial_sum; + } + } + } + } + } + + else { + sum = 0.; + factorial_sum = 0.; + + for (j = time;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + factorial_sum += factorial[i][k][j]; + } + + if ((contrast[j] != D_INF) && (sum > 0.)) { + contrast[j] += sum * (log(sum / (nb_sequence * (j - time + 1))) - 1) - factorial_sum; + } + } + } + } + + else if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = 0.; + binomial_coeff_sum = 0.; + + for (k = time;k < length[j];k++) { + sum += int_sequence[j][i][k]; + binomial_coeff_sum += binomial_coeff[i][j][k]; + + if (contrast[k] != D_INF) { + if (sum > inf_bound_parameter[i - 1] * (k - time + 1)) { + proba = shape_parameter[i - 1] * (k - time + 1) / + ((shape_parameter[i - 1] - inf_bound_parameter[i - 1]) * (k - time + 1) + sum); + contrast[k] += binomial_coeff_sum + shape_parameter[i - 1] * (k - time + 1) * log(proba) + + (sum - inf_bound_parameter[i - 1] * (k - time + 1)) * log(1. - proba); + } + else { + contrast[k] = D_INF; + } + } + } + } + } + } + + else { + sum = 0.; + binomial_coeff_sum = 0.; + + for (j = time;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + binomial_coeff_sum += binomial_coeff[i][k][j]; + } + + if (contrast[j] != D_INF) { + if (sum > inf_bound_parameter[i - 1] * nb_sequence * (j - time + 1)) { + proba = shape_parameter[i - 1] * nb_sequence * (j - time + 1) / + ((shape_parameter[i - 1] - inf_bound_parameter[i - 1]) * nb_sequence * (j - time + 1) + sum); + contrast[j] += binomial_coeff_sum + shape_parameter[i - 1] * nb_sequence * (j - time + 1) * log(proba) + + (sum - inf_bound_parameter[i - 1] * nb_sequence * (j - time + 1)) * log(1. - proba); + } + else { + contrast[j] = D_INF; + } + } + } + } + } + + else if ((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[0] == MEAN_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + sum = int_sequence[j][i][time]; + residual[j][time] = 0.; + + for (k = time + 1;k < length[j];k++) { + diff = int_sequence[j][i][k] - sum / (k - time); + square_sum += ((double)(k - time) / (double)(k - time + 1)) * diff * diff; + sum += int_sequence[j][i][k]; + residual[j][k] = square_sum; + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + sum = real_sequence[j][i][time]; + residual[j][time] = 0.; + + for (k = time + 1;k < length[j];k++) { + diff = real_sequence[j][i][k] - sum / (k - time); + square_sum += ((double)(k - time) / (double)(k - time + 1)) * diff * diff; + sum += real_sequence[j][i][k]; + residual[j][k] = square_sum; + } + } + } + } + } + + else { + square_sum = 0.; + sum = 0.; + count = 0; + + if (type[i] != REAL_VALUE) { + for (j = time;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = int_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += int_sequence[k][i][j]; + } + residual[0][j] = square_sum; + } + } + + else { + for (j = time;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = real_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += real_sequence[k][i][j]; + } + residual[0][j] = square_sum; + } + } + } + } + + else if (model_type[i - 1] == VARIANCE_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + for (k = time;k < length[j];k++) { + diff = int_sequence[j][i][k] - seq_mean[i][j]; + square_sum += diff * diff; + residual[j][k] = square_sum; + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + for (k = time;k < length[j];k++) { + diff = real_sequence[j][i][k] - seq_mean[i][j]; + square_sum += diff * diff; + residual[j][k] = square_sum; + } + } + } + } + } + + else { + square_sum = 0.; + + if (type[i] != REAL_VALUE) { + for (j = time;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][i][j] - seq_mean[i][0]; + square_sum += diff * diff; + } + residual[0][j] = square_sum; + } + } + + else { + for (j = time;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][i][j] - seq_mean[i][0]; + square_sum += diff * diff; + } + residual[0][j] = square_sum; + } + } + } + } + + else if (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + sum = rank[i][int_sequence[j][i][time]]; + residual[j][time] = 0.; + + for (k = time + 1;k < length[j];k++) { + diff = rank[i][int_sequence[j][i][k]] - sum / (k - time); + square_sum += ((double)(k - time) / (double)(k - time + 1)) * diff * diff; + sum += rank[i][int_sequence[j][i][k]]; + residual[j][k] = square_sum; + + if (residual[j][k] == 0.) { + residual[j][k] = (k - time + 1) * MIN_RANK_SQUARE_SUM; + } + } + } + } + } + + else { + square_sum = 0.; + sum = 0.; + count = 0; + + for (j = time;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + if (count > 0) { + diff = rank[i][int_sequence[k][i][j]] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + } + count++; + sum += rank[i][int_sequence[k][i][j]]; + } + residual[0][j] = square_sum; + + if (residual[0][j] == 0.) { + residual[0][j] = count * MIN_RANK_SQUARE_SUM; + } + } + } + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + index_parameter_sum = seq_index_parameter[time]; + sum = int_sequence[j][i][time]; + residual[j][time] = 0.; + + for (k = time + 1;k < length[j];k++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_sum / (k - time); + index_parameter_square_sum += ((double)(k - time) / (double)(k - time + 1)) * + index_parameter_diff * index_parameter_diff; + diff = int_sequence[j][i][k] - sum / (k - time); + square_sum += ((double)(k - time) / (double)(k - time + 1)) * diff * diff; + mix_square_sum += ((double)(k - time) / (double)(k - time + 1)) * index_parameter_diff * diff; + index_parameter_sum += seq_index_parameter[k]; + sum += int_sequence[j][i][k]; + + if ((k > time + 1) && (index_parameter_square_sum > 0.)) { + residual[j][k] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[j][k] = 0.; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + index_parameter_square_sum = 0.; + square_sum = 0.; + mix_square_sum = 0.; + index_parameter_sum = seq_index_parameter[time]; + sum = real_sequence[j][i][time]; + residual[j][time] = 0.; + + for (k = time + 1;k < length[j];k++) { + index_parameter_diff = seq_index_parameter[k] - index_parameter_sum / (k - time); + index_parameter_square_sum += ((double)(k - time) / (double)(k - time + 1)) * + index_parameter_diff * index_parameter_diff; + diff = real_sequence[j][i][k] - sum / (k - time); + square_sum += ((double)(k - time) / (double)(k - time + 1)) * diff * diff; + mix_square_sum += ((double)(k - time) / (double)(k - time + 1)) * index_parameter_diff * diff; + index_parameter_sum += seq_index_parameter[k]; + sum += real_sequence[j][i][k]; + + if ((k > time + 1) && (index_parameter_square_sum > 0.)) { + residual[j][k] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[j][k] = 0.; + } + } + } + } + } + } + + else { + index_parameter_square_sum = 0.; + index_parameter_sum = nb_sequence * seq_index_parameter[time]; + square_sum = 0.; + mix_square_sum = 0.; + count = 1; + residual[0][time] = 0.; + + if (type[i] != REAL_VALUE) { + sum = int_sequence[0][i][time]; + for (j = 1;j < nb_sequence;j++) { + diff = int_sequence[j][i][time] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + count++; + sum += int_sequence[j][i][time]; + } + + for (j = time + 1;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + index_parameter_diff = seq_index_parameter[j] - index_parameter_sum / count; + index_parameter_square_sum += ((double)count / (double)(count + 1)) * + index_parameter_diff * index_parameter_diff; + diff = int_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + mix_square_sum += ((double)count / (double)(count + 1)) * index_parameter_diff * diff; + count++; + index_parameter_sum += seq_index_parameter[j]; + sum += int_sequence[k][i][j]; + } + + if (index_parameter_square_sum > 0.) { + residual[0][j] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[0][j] = 0.; + } + } + } + + else { + sum = real_sequence[0][i][time]; + for (j = 1;j < nb_sequence;j++) { + diff = real_sequence[j][i][time] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + count++; + sum += real_sequence[j][i][time]; + } + + for (j = time + 1;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + index_parameter_diff = seq_index_parameter[j] - index_parameter_sum / count; + index_parameter_square_sum += ((double)count / (double)(count + 1)) * + index_parameter_diff * index_parameter_diff; + diff = real_sequence[k][i][j] - sum / count; + square_sum += ((double)count / (double)(count + 1)) * diff * diff; + mix_square_sum += ((double)count / (double)(count + 1)) * index_parameter_diff * diff; + count++; + index_parameter_sum += seq_index_parameter[j]; + sum += real_sequence[k][i][j]; + } + + if (index_parameter_square_sum > 0.) { + residual[0][j] = square_sum - mix_square_sum * mix_square_sum / index_parameter_square_sum; + } + else { + residual[0][j] = 0.; + } + } + } + } + } + + else if (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = int_sequence[j][i][time]; + + if (time + 1 < length[j]) { + diff = int_sequence[j][i][time + 1] - int_sequence[j][i][time]; + square_sum = diff * diff / 4.; + shifted_square_sum = square_sum; + autocovariance = -square_sum; + sum += int_sequence[j][i][time + 1]; + residual[j][time + 1] = 0.; + } + + for (k = time + 2;k < length[j];k++) { + diff = int_sequence[j][i][k] - sum / (k - time); + shifted_diff = int_sequence[j][i][k - 1] - sum / (k - time); + square_sum += (1. + (double)(k - time) / ((double)(k - time + 1) * (k - time + 1))) * diff * diff - + (2. / (double)(k - time + 1)) * diff * (int_sequence[j][i][k] - int_sequence[j][i][time]); + shifted_square_sum += shifted_diff * shifted_diff + + ((double)(k - time) / ((double)(k - time + 1) * (k - time + 1))) * diff * diff; + autocovariance += diff * shifted_diff + + ((double)(k - time) / ((double)(k - time + 1) * (k - time + 1))) * diff * diff - + (1. / (double)(k - time + 1)) * diff * (int_sequence[j][i][k] - int_sequence[j][i][time]); + sum += int_sequence[j][i][k]; + + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + sum = real_sequence[j][i][time]; + + if (time + 1 < length[j]) { + diff = real_sequence[j][i][time + 1] - real_sequence[j][i][time]; + square_sum = diff * diff / 4.; + shifted_square_sum = square_sum; + autocovariance = -square_sum; + sum += real_sequence[j][i][time + 1]; + residual[j][time + 1] = 0.; + } + + for (k = time + 2;k < length[j];k++) { + diff = real_sequence[j][i][k] - sum / (k - time); + shifted_diff = real_sequence[j][i][k - 1] - sum / (k - time); + square_sum += (1. + (double)(k - time) / ((double)(k - time + 1) * (k - time + 1))) * diff * diff - + (2. / (double)(k - time + 1)) * diff * (real_sequence[j][i][k] - real_sequence[j][i][time]); + shifted_square_sum += shifted_diff * shifted_diff + + ((double)(k - time) / ((double)(k - time + 1) * (k - time + 1))) * diff * diff; + autocovariance += diff * shifted_diff + + ((double)(k - time) / ((double)(k - time + 1) * (k - time + 1))) * diff * diff - + (1. / (double)(k - time + 1)) * diff * (real_sequence[j][i][k] - real_sequence[j][i][time]); + sum += real_sequence[j][i][k]; + + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + } + + else { + if (type[i] != REAL_VALUE) { + sum = 0.; + for (j = 0;j < nb_sequence;j++) { + sum += int_sequence[j][i][time]; + } + + if (time + 1 < length[0]) { + for (j = 0;j < nb_sequence;j++) { + sum += int_sequence[j][i][time + 1]; + } + mean = sum / (nb_sequence * 2); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (j = 0;j < nb_sequence;j++) { + diff = int_sequence[j][i][time + 1] - mean; + shifted_diff = int_sequence[j][i][time] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[0][time + 1] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][time + 1] -= autocovariance * autocovariance / shifted_square_sum; + } + } + + for (j = time + 2;j < length[0];j++) { + mean = sum / (nb_sequence * (j - time)); + square_sum_term[0] = 0.; + square_sum_term[1] = 0.; + square_sum_term[2] = 0.; + diff = 0.; + range_diff = 0.; + + for (k = 0;k < nb_sequence;k++) { + sum += int_sequence[k][i][j]; + buff = int_sequence[k][i][j] - mean; + diff += buff; + square_sum_term[0] += buff * buff; + shifted_diff = int_sequence[k][i][j - 1] - mean; + square_sum_term[1] += shifted_diff * shifted_diff; + square_sum_term[2] += buff * shifted_diff; + range_diff += int_sequence[k][i][j] - int_sequence[k][i][time]; + } + + square_sum += square_sum_term[0] + + ((double)(j - time) / ((double)nb_sequence * (j - time + 1) * (j - time + 1))) * diff * diff - + (2. / ((double)nb_sequence * (j - time + 1))) * diff * range_diff; + shifted_square_sum += square_sum_term[1] + + ((double)(j - time) / ((double)nb_sequence * (j - time + 1) * (j - time + 1))) * diff * diff; + autocovariance += square_sum_term[2] + + ((double)(j - time) / ((double)nb_sequence * (j - time + 1) * (j - time + 1))) * diff * diff - + (1. / ((double)nb_sequence * (j - time + 1))) * diff * range_diff; + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + + else { + sum = 0.; + for (j = 0;j < nb_sequence;j++) { + sum += real_sequence[j][i][time]; + } + + if (time + 1 < length[0]) { + for (j = 0;j < nb_sequence;j++) { + sum += real_sequence[j][i][time + 1]; + } + mean = sum / (nb_sequence * 2); + + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + for (j = 0;j < nb_sequence;j++) { + diff = real_sequence[j][i][time + 1] - mean; + shifted_diff = real_sequence[j][i][time] - mean; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[0][time + 1] = square_sum ; + if (shifted_square_sum > 0.) { + residual[0][time + 1] -= autocovariance * autocovariance / shifted_square_sum; + } + } + + for (j = time + 2;j < length[0];j++) { + mean = sum / (nb_sequence * (j - time)); + square_sum_term[0] = 0.; + square_sum_term[1] = 0.; + square_sum_term[2] = 0.; + diff = 0.; + range_diff = 0.; + + for (k = 0;k < nb_sequence;k++) { + sum += real_sequence[k][i][j]; + buff = real_sequence[k][i][j] - mean; + diff += buff; + square_sum_term[0] += buff * buff; + shifted_diff = real_sequence[k][i][j - 1] - mean; + square_sum_term[1] += shifted_diff * shifted_diff; + square_sum_term[2] += buff * shifted_diff; + range_diff += real_sequence[k][i][j] - real_sequence[k][i][time]; + } + + square_sum += square_sum_term[0] + + ((double)(j - time) / ((double)nb_sequence * (j - time + 1) * (j - time + 1))) * diff * diff - + (2. / ((double)nb_sequence * (j - time + 1))) * diff * range_diff; + shifted_square_sum += square_sum_term[1] + + ((double)(j - time) / ((double)nb_sequence * (j - time + 1) * (j - time + 1))) * diff * diff; + autocovariance += square_sum_term[2] + + ((double)(j - time) / ((double)nb_sequence * (j - time + 1) * (j - time + 1))) * diff * diff - + (1. / ((double)nb_sequence * (j - time + 1))) * diff * range_diff; + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + else if (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + + for (k = time + 1;k < length[j];k++) { + diff = int_sequence[j][i][k] - seq_mean[i][j]; + shifted_diff = int_sequence[j][i][k - 1] - seq_mean[i][j]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + + for (k = time + 1;k < length[j];k++) { + diff = real_sequence[j][i][k] - seq_mean[i][j]; + shifted_diff = real_sequence[j][i][k - 1] - seq_mean[i][j]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + + residual[j][k] = square_sum; + if (shifted_square_sum > 0.) { + residual[j][k] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + } + + else { + square_sum = 0.; + shifted_square_sum = 0.; + autocovariance = 0.; + + if (type[i] != REAL_VALUE) { + for (j = time + 1;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = int_sequence[k][i][j] - seq_mean[i][0]; + shifted_diff = int_sequence[k][i][j - 1] - seq_mean[i][0]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + + else { + for (j = time + 1;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + diff = real_sequence[k][i][j] - seq_mean[i][0]; + shifted_diff = real_sequence[k][i][j - 1] - seq_mean[i][0]; + square_sum += diff * diff; + shifted_square_sum += shifted_diff * shifted_diff; + autocovariance += diff * shifted_diff; + } + + residual[0][j] = square_sum; + if (shifted_square_sum > 0.) { + residual[0][j] -= autocovariance * autocovariance / shifted_square_sum; + } + } + } + } + } + + else if (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) { + prior_contrast = -lgamma(hyperparam[i][0]) + hyperparam[i][0] * log(hyperparam[i][1]); + + sum = 0.; + factorial_sum = 0.; + for (j = time;j < length[index];j++) { + sum += int_sequence[index][i][j]; + factorial_sum += factorial[i][index][j]; + if (contrast[j] != D_INF) { + contrast[j] += prior_contrast - factorial_sum + lgamma(hyperparam[i][0] + sum) - + (hyperparam[i][0] + sum) * log(hyperparam[i][1] + j - time + 1); + } + } + } + + else if (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) { + prior_contrast = log(hyperparam[i][1]) / 2 - lgamma(hyperparam[i][2] / 2) + + hyperparam[i][2] * log(hyperparam[i][3] / 2) / 2; + + if (type[i] != REAL_VALUE) { + square_sum = 0.; + sum = int_sequence[index][i][time]; + if (contrast[time] != D_INF) { + diff = hyperparam[i][0] - sum; + contrast[time] += prior_contrast - log(2 * M_PI) / 2 - + log(hyperparam[i][1] + 1) / 2 + lgamma((hyperparam[i][2] + 1) / 2) - + (hyperparam[i][2] + 1) * + log((hyperparam[i][3] + hyperparam[i][1] * + diff * diff / (hyperparam[i][1] + 1)) / 2) / 2; + } + + for (j = time + 1;j < length[index];j++) { + diff = int_sequence[index][i][j] - sum / (j - time); + square_sum += ((double)(j - time) / (double)(j - time + 1)) * diff * diff; + sum += int_sequence[index][i][j]; + if (contrast[j] != D_INF) { + diff = hyperparam[i][0] - sum / (j - time + 1); + contrast[j] += prior_contrast - (j - time + 1) * log(2 * M_PI) / 2 - + log(hyperparam[i][1] + j - time + 1) / 2 + + lgamma((hyperparam[i][2] + j - time + 1) / 2) - + (hyperparam[i][2] + j - time + 1) * + logl((hyperparam[i][3] + square_sum + hyperparam[i][1] * (j - time + 1) * + diff * diff / (hyperparam[i][1] + j - time + 1)) / 2) / 2; + } + } + } + + else { + square_sum = 0.; + sum = real_sequence[index][i][time]; + if (contrast[time] != D_INF) { + diff = hyperparam[i][0] - sum; + contrast[time] += prior_contrast - log(2 * M_PI) / 2 - + log(hyperparam[i][1] + 1) / 2 + lgamma((hyperparam[i][2] + 1) / 2) - + (hyperparam[i][2] + 1) * + log((hyperparam[i][3] + hyperparam[i][1] * + diff * diff / (hyperparam[i][1] + 1)) / 2) / 2; + } + + for (j = time + 1;j < length[index];j++) { + diff = real_sequence[index][i][j] - sum / (j - time); + square_sum += ((double)(j - time) / (double)(j - time + 1)) * diff * diff; + sum += real_sequence[index][i][j]; + if (contrast[j] != D_INF) { + diff = hyperparam[i][0] - sum / (j - time + 1); + contrast[j] += prior_contrast - (j - time + 1) * log(2 * M_PI) / 2 - + log(hyperparam[i][1] + j - time + 1) / 2 + + lgamma((hyperparam[i][2] + j - time + 1) / 2) - + (hyperparam[i][2] + j - time + 1) * + logl((hyperparam[i][3] + square_sum + hyperparam[i][1] * (j - time + 1) * + diff * diff / (hyperparam[i][1] + j - time + 1)) / 2) / 2; + } + } + } + } + + if ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = time + 1;k < length[j];k++) { + contrast[k] -= residual[j][k]; + } + } + } + } + + else { + for (j = time;j < length[0];j++) { + contrast[j] -= residual[0][j]; + } + } + } + + else if ((model_type[i - 1] == GAUSSIAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i - 1] == LINEAR_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = time;k < length[j];k++) { + if (contrast[k] != D_INF) { +// if (residual[j][k] > 0.) { + if (residual[j][k] > (k - time + 1) * ROUNDOFF_ERROR) { + contrast[k] -= ((double)(k - time + 1) / 2.) * (logl(residual[j][k] / + (k - time + 1)) + log(2 * M_PI) + 1); +/* contrast[k] -= ((double)(k - time + 1) / 2.) * (logl(residual[j][k] / + (k - time)) + log(2 * M_PI)) + (double)(k - time) / 2.; */ + } + else { + contrast[k] = D_INF; + } + } + } + } + } + } + + else { + for (j = time;j < length[0];j++) { + if (contrast[j] != D_INF) { +// if (residual[0][j] > 0.) { + if (residual[0][j] > nb_sequence * (j - time + 1) * ROUNDOFF_ERROR) { + contrast[j] -= ((double)(nb_sequence * (j - time + 1)) / 2.) * (logl(residual[0][j] / + (nb_sequence * (j - time + 1))) + log(2 * M_PI) + 1); + } + else { + contrast[j] = D_INF; + } + } + } + } + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + for (k = time + 1;k < length[j];k++) { + if (contrast[k] != D_INF) { +// if (residual[j][k] > 0.) { + if (residual[j][k] > (k - time) * ROUNDOFF_ERROR) { + contrast[k] -= ((double)(k - time) / 2.) * (logl(residual[j][k] / + (k - time)) + log(2 * M_PI) + 1); + } + else { + contrast[k] = D_INF; + } + } + } + } + } + } + + else { + for (j = time + 1;j < length[0];j++) { + if (contrast[j] != D_INF) { +// if (residual[0][j] > 0.) { + if (residual[0][j] > nb_sequence * (j - time) * ROUNDOFF_ERROR) { + contrast[j] -= ((double)(nb_sequence * (j - time)) / 2.) * (logl(residual[0][j] / + (nb_sequence * (j - time))) + log(2 * M_PI) + 1); + } + else { + contrast[j] = D_INF; + } + } + } + } + } + } + +# ifdef DEBUG + for (i = time;i < length[index == I_DEFAULT ? 0 : index];i++) { + cout << contrast[i] << " "; + } + cout << endl; +# endif + + delete [] frequency; + delete [] inf_bound_parameter; + + if (residual) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_sequence;i++) { + delete [] residual[i]; + } + } + else { + delete [] residual[0]; + } + delete [] residual; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Optimal segmentation of a single sequence or a sample of sequences. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] rank ranks (for ordinal variables), + * \param[in] isegmentation_likelihood pointer on the segmentation log-likelihoods, + * \param[in] nb_parameter pointer on the number of free parameters of models, + * \param[in] segment_penalty pointer on the penalties related to segment lengths (for mBIC). + * + * \return log-likelihood of the optimal segmentation. + */ +/*--------------------------------------------------------------*/ + +double Sequences::segmentation(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , double **rank , + double *isegmentation_likelihood , int *nb_parameter , + double *segment_penalty) + +{ + bool *used_output; + int i , j , k , m , n , p; + int max_nb_value , seq_length , count , *inf_bound_parameter , *seq_index_parameter , + *psegment , **optimal_length; + double buff , segmentation_likelihood , **seq_mean , **hyperparam , **forward , + ***factorial , ***binomial_coeff; + long double *contrast; + + + max_nb_value = 0; + factorial = new double**[nb_variable]; + inf_bound_parameter = new int[nb_variable]; + binomial_coeff = new double**[nb_variable]; + seq_mean = new double*[nb_variable]; + seq_index_parameter = NULL; + hyperparam = new double*[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == CATEGORICAL_CHANGE) && (marginal_distribution[i]->nb_value > max_nb_value)) { + max_nb_value = marginal_distribution[i]->nb_value; + } + + // computation of log of factorials for Poisson models + + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + factorial[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + factorial[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + factorial[i][j][k] = log_factorial(int_sequence[j][i][k]); + } + } + else { + factorial[i][j] = NULL; + } + } + } + + else { + factorial[i] = NULL; + } + + // computation of log of binomial coefficients for negative binomial models + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + + binomial_coeff[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + binomial_coeff[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + binomial_coeff[i][j][k] = log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[j][i][k]); + } + } + else { + binomial_coeff[i][j] = NULL; + } + } + } + + else { + binomial_coeff[i] = NULL; + } + + // computation of sequence means for Gaussian change in the variance models or + // stationary piecewise autoregressive models + + if ((model_type[i - 1] == VARIANCE_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + seq_mean[i] = new double[nb_sequence]; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += int_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += real_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + } + + else { + seq_mean[i] = new double[1]; + seq_mean[i][0] = 0.; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += int_sequence[k][i][j]; + } + } + } + + else { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += real_sequence[k][i][j]; + } + } + } + + seq_mean[i][0] /= (nb_sequence * length[0]); + } + } + + else { + seq_mean[i] = NULL; + } + + if (((i == 1) && (model_type[0] == INTERCEPT_SLOPE_CHANGE)) || + ((model_type[i - 1] == LINEAR_MODEL_CHANGE) && (!seq_index_parameter))) { + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + } + + // computation of hyperparameters for Bayesian Poisson and Gaussian models + + if (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) { + hyperparam[i] = new double[2]; + gamma_hyperparameter_computation(index , i , hyperparam[i]); + } + else if (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) { + hyperparam[i] = new double[4]; + gaussian_gamma_hyperparameter_computation(index , i , hyperparam[i]); + } + else { + hyperparam[i] = NULL; + } + } + + seq_length = length[index == I_DEFAULT ? 0 : index]; + contrast = new long double[seq_length]; + + forward = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + forward[i] = new double[nb_segment]; + } + + optimal_length = new int*[seq_length]; + for (i = 0;i < seq_length;i++) { + optimal_length[i] = new int[nb_segment]; + } + + if ((nb_parameter) && (max_nb_value > 0)) { + used_output = new bool[max_nb_value]; + } + else { + used_output = NULL; + } + + // forward recurrence + + for (i = 0;i < seq_length;i++) { + + // computation of segment contrast functions (log-likelihoods or sum of squared deviations) + + forward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { +// for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + forward[i][j] = contrast[0]; + if (forward[i][j] != D_INF) { + optimal_length[i][j] = i + 1; + } + } + + else { + forward[i][j] = D_INF; + for (k = i;k >= j;k--) { + if ((contrast[k] != D_INF) && (forward[k - 1][j - 1] != D_INF)) { + buff = contrast[k] + forward[k - 1][j - 1]; + if (buff > forward[i][j]) { + forward[i][j] = buff; + optimal_length[i][j] = i - k + 1; + } + } + } + } + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + if (isegmentation_likelihood) { + for (i = 0;i < nb_segment;i++) { + isegmentation_likelihood[i] = forward[seq_length - 1][i]; + } + } + + segmentation_likelihood = forward[seq_length - 1][nb_segment - 1]; + } + + else { + count = (index == I_DEFAULT ? nb_sequence : 1); + + if (isegmentation_likelihood) { + for (i = 0;i < nb_segment;i++) { + if (forward[seq_length - 1][i] < 0.) { + isegmentation_likelihood[i] = -((double)(count * seq_length) / 2.) * + (log(-forward[seq_length - 1][i] / + (count * seq_length)) + log(2 * M_PI) + 1); +/* isegmentation_likelihood[i] = -(((double)(count * seq_length) / 2.) * + (log(-forward[seq_length - 1][i] / + (count * (seq_length - nb_segment))) + log(2 * M_PI)) + + (double)(count * (seq_length - nb_segment)) / 2.); */ + } + else { + isegmentation_likelihood[i] = D_INF; + } + } + } + + if (forward[seq_length - 1][nb_segment - 1] < 0.) { + segmentation_likelihood = -((double)(count * seq_length) / 2.) * + (log(-forward[seq_length - 1][nb_segment - 1] / + (count * seq_length)) + log(2 * M_PI) + 1); +/* segmentation_likelihood = -(((double)(count * seq_length) / 2.) * + (log(-forward[seq_length - 1][nb_segment - 1] / + (count * (seq_length - nb_segment))) + log(2 * M_PI)) + + (double)(count * (seq_length - nb_segment)) / 2.); */ + } + else { + segmentation_likelihood = D_INF; + } + } + + // computation of the penalty term related to the change-point distribution (modified BIC) + + if (segment_penalty) { + +# ifdef DEBUG + int cumul_segment_length; + cout << "\n"; +# endif + + for (i = 0;i < nb_segment;i++) { + segment_penalty[i] = 0.; + j = seq_length - 1; + +# ifdef DEBUG + cumul_segment_length = 0; +# endif + + for (k = i;k >= 0;k--) { + +# ifdef DEBUG + cout << optimal_length[j][k] << " "; + cumul_segment_length += optimal_length[j][k]; +# endif + + segment_penalty[i] += log((double)optimal_length[j][k]); + j -= optimal_length[j][k]; + } + +# ifdef DEBUG + cout << "| " << segment_penalty[i] << endl; + if (cumul_segment_length != seq_length) { + cout << "\nERROR: " << i << " " << cumul_segment_length << " | " << seq_length << endl; + } +# endif + + } + } + + // computation of the number of free parameters + + if (nb_parameter) { + for (i = 0;i < nb_segment;i++) { +// nb_parameter[i] = 0; + nb_parameter[i] = i; + + if (model_type[0] == MEAN_CHANGE) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter[i] += i + 2; + } + else { + nb_parameter[i] += nb_sequence * (i + 1) + 1; + } + } + + else if (model_type[0] == INTERCEPT_SLOPE_CHANGE) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter[i] += (i + 1) * 2 + 1; + } + else { + nb_parameter[i] += nb_sequence * (i + 1) * 2 + 1; + } + } + + else { + for (j = 1;j < nb_variable;j++) { + if (model_type[j - 1] == CATEGORICAL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (k = 0;k < nb_sequence;k++) { + if ((index == I_DEFAULT) || (index == k)) { + m = length[k] - 1; + + for (n = i;n >= 0;n--) { + for (p = 0;p < marginal_distribution[j]->nb_value;p++) { + used_output[p] = false; + } + nb_parameter[i]--; + + for (p = m;p > m - optimal_length[m][n];p--) { + if (!used_output[int_sequence[k][j][p]]) { + nb_parameter[i]++; + used_output[int_sequence[k][j][p]] = true; + } + } + + m -= optimal_length[m][n]; + } + } + } + } + + else { + k = length[0] - 1; + + for (m = i;m >= 0;m--) { + for (n = 0;n < marginal_distribution[j]->nb_value;n++) { + used_output[n] = false; + } + nb_parameter[i]--; + + for (n = k;n > k - optimal_length[k][m];n--) { + for (p = 0;p < nb_sequence;p++) { + if (!used_output[int_sequence[p][j][n]]) { + nb_parameter[i]++; + used_output[int_sequence[p][j][n]] = true; + } + } + } + + k -= optimal_length[k][m]; + } + } + } + + else if ((model_type[j - 1] == POISSON_CHANGE) || (model_type[j - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[j - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[j - 1] == BAYESIAN_POISSON_CHANGE)) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter[i] += i + 1; + } + else { + nb_parameter[i] += nb_sequence * (i + 1); + } + } + + else if ((model_type[j - 1] == GAUSSIAN_CHANGE) || (model_type[j - 1] == ORDINAL_GAUSSIAN_CHANGE) || + (model_type[j - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter[i] += 2 * (i + 1); + } + else { + nb_parameter[i] += nb_sequence * 2 * (i + 1); + } + } + + else if (model_type[j - 1] == VARIANCE_CHANGE) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter[i] += i + 2; + } + else { + nb_parameter[i] += nb_sequence * (i + 2); + } + } + + else if ((model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter[i] += 3 * (i + 1); + } + else { + nb_parameter[i] += nb_sequence * 3 * (i + 1); + } + } + + else if (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (common_contrast)) { + nb_parameter[i] += 2 * (i + 1) + 1; + } + else { + nb_parameter[i] += nb_sequence * (2 * (i + 1) + 1); + } + } + } + } + } + } + + // restoration + + i = seq_length - 1; + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0] + i; + + for (j = nb_segment - 1;j >= 0;j--) { +// for (k = 0;k < optimal_length[i][j];m++) { + for (k = i;k > i - optimal_length[i][j];k--) { + *psegment-- = j; + } + i -= optimal_length[i][j]; + } + + if (index == I_DEFAULT) { + for (i = 1;i < nb_sequence;i++) { + for (j = 0;j < length[0];j++) { + int_sequence[i][0][j] = int_sequence[0][0][j]; + } + } + } + + min_value[0] = 0; + max_value[0] = nb_segment - 1; + delete marginal_distribution[0]; + build_marginal_frequency_distribution(0); + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] factorial[i][j]; + } + delete [] factorial[i]; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] binomial_coeff[i][j]; + } + delete [] binomial_coeff[i]; + } + + delete [] seq_mean[i]; + delete [] hyperparam[i]; + } + delete [] factorial; + delete [] inf_bound_parameter; + delete [] binomial_coeff; + delete [] seq_mean; + delete [] hyperparam; + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + delete [] contrast; + + for (i = 0;i < seq_length;i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 0;i < seq_length;i++) { + delete [] optimal_length[i]; + } + delete [] optimal_length; + + delete [] used_output; + + return segmentation_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Optimal segmentation of a single sequence or a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the segmentation, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output (sequence or residuals). + * \param[in] continuity flag continuous piecewise linear function. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation(StatError &error , ostream *os , int iidentifier , + int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + sequence_type output , bool continuity) const + +{ + bool status = true; + int i , j; + int index , nb_parameter; + double segmentation_likelihood , segment_penalty , penalized_likelihood , **rank; + FrequencyDistribution *marginal; + Sequences *seq , *iseq , *oseq; + + + oseq = NULL; + error.init(); + +/* if (((index_param_type == TIME) && (index_interval->variance > 0.)) || + (index_param_type == POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + if (index_param_type == POSITION) { + status = false; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE] , SEQ_index_parameter_word[TIME]); + } */ + + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == POISSON_CHANGE) || + (model_type[i] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i] == BAYESIAN_POISSON_CHANGE)) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (((model_type[i] != NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 0)) || + ((model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 1))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else if (model_type[i] == CATEGORICAL_CHANGE) { + if ((marginal_distribution[i]->nb_value < 2) || + (marginal_distribution[i]->nb_value > NB_OUTPUT)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_VALUE]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + + if (((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == ORDINAL_GAUSSIAN_CHANGE)) && + ((output == SUBTRACTION_RESIDUAL) || (output == ABSOLUTE_RESIDUAL) || (output == DIVISION_RESIDUAL))) { + status = false; + error.update(SEQ_error[SEQR_FORBIDDEN_OUTPUT]); + } + } + + else if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (((model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && + (index_param_type != IMPLICIT_TYPE) && (index_interval->variance > 0.)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + if (((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || + (model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && + (output == SEQUENCE_SAMPLE)) { + status = false; + error.update(SEQ_error[SEQR_FORBIDDEN_OUTPUT]); + } + } + + if (iidentifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (iidentifier == identifier[i]) { + index = i; + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + else { + index = I_DEFAULT; + if (length_distribution->variance > 0.) { + status = false; + error.update(SEQ_error[SEQR_VARIABLE_SEQUENCE_LENGTH]); + } + } + + if (((index != I_DEFAULT) || (!common_contrast)) && (output == SEQUENCE_SAMPLE)) { + status = false; + error.update(SEQ_error[SEQR_FORBIDDEN_OUTPUT]); + } + + if ((status) && ((nb_segment < 1) || (nb_segment > length[index == I_DEFAULT ? 0 : index] / 2))) { + status = false; + error.update(SEQ_error[SEQR_NB_SEGMENT]); + } + + if (status) { + if (index != I_DEFAULT) { + iseq = new Sequences(*this , 1 , &index); + seq = new Sequences(*iseq , ADD_STATE_VARIABLE); + delete iseq; + } + else { + seq = new Sequences(*this , ADD_STATE_VARIABLE); + } + + // rank computation for ordinal variables + + rank = new double*[seq->nb_variable]; + + for (i = 1;i < seq->nb_variable;i++) { + if (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) { + rank[i] = seq->marginal_distribution[i]->rank_computation(); + } + else { + rank[i] = NULL; + } + } + + segmentation_likelihood = seq->segmentation((index == I_DEFAULT ? index : 0) , nb_segment , model_type , + common_contrast , shape_parameter , rank); + + for (i = 1;i < seq->nb_variable;i++) { + delete [] rank[i]; + } + delete [] rank; + + if (segmentation_likelihood != D_INF) { + if (os) { + segment_penalty = 0.; + i = 0; + for (j = 1;j < seq->length[0];j++) { + if (seq->int_sequence[0][0][j] != seq->int_sequence[0][0][j - 1]) { + segment_penalty += log((double)(j - i)); + i = j; + } + } + segment_penalty += log((double)(seq->length[0] - i)); + + nb_parameter = seq->nb_parameter_computation((index == I_DEFAULT ? index : 0) , nb_segment , model_type , + common_contrast); + + penalized_likelihood = 2 * segmentation_likelihood - nb_parameter * + log((double)((seq->nb_variable - 1) * seq->length[0])) - segment_penalty; + + *os << "\n" << nb_segment << " " << (nb_segment == 1 ? SEQ_label[SEQL_SEGMENT] : SEQ_label[SEQL_SEGMENTS]) + << " 2 * " << STAT_label[STATL_LIKELIHOOD] << ": " << 2 * segmentation_likelihood << " " + << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (Modified " << STAT_criterion_word[BIC] << "): " + << penalized_likelihood << endl; + } + + oseq = seq->segmentation_output(nb_segment , model_type , common_contrast , os , output , + NULL , continuity); + + if ((output == SEQUENCE) || (output == ABSOLUTE_RESIDUAL)) { + delete seq; + } + } + + else { + delete seq; + oseq = NULL; + error.update(SEQ_error[SEQR_SEGMENTATION_FAILURE]); + } + } + + return oseq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Optimal segmentation of a single sequence or a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the segmentation, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output (sequence or residuals). + * \param[in] continuity flag continuous piecewise linear function. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation(StatError &error , ostream *os , int iidentifier , + int nb_segment , vector &model_type , + bool common_contrast , vector &shape_parameter , + sequence_type output , bool continuity) const + +{ + return segmentation(error , os , iidentifier , nb_segment , model_type.data() , + common_contrast , shape_parameter.data() , output , continuity); +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/change_points3.cpp b/src/cpp/sequence_analysis/change_points3.cpp new file mode 100644 index 0000000..a040d18 --- /dev/null +++ b/src/cpp/sequence_analysis/change_points3.cpp @@ -0,0 +1,5485 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + +extern double log_factorial(int value); +extern double log_binomial_coefficient(int inf_bound , double parameter , int value); +extern int column_width(int nb_value , const long double *value); + + +#if defined (SYSTEM_IS__CYGWIN) +#define expl exp +#endif + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the inf_bound parameter of the prior segment length distribution. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals. + * + * \return inf_bound parameter. + */ +/*--------------------------------------------------------------*/ + +double Sequences::prior_segment_length_inf_bound_computation(int index , int nb_segment , segment_model *model_type , + bool common_contrast) const + +{ + int i; + int seq_length , inf_bound; + + + inf_bound = 1; + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 1;i < nb_variable;i++) { + if (((i == 1) && (model_type[0] == MEAN_CHANGE)) || ((model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE))) { + inf_bound = 2; + } + if (((i == 1) && (model_type[0] == INTERCEPT_SLOPE_CHANGE)) || ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE))) { + inf_bound = 3; + } + } + } + + else { + for (i = 1;i < nb_variable;i++) { + if (((i == 1) && (model_type[0] == INTERCEPT_SLOPE_CHANGE)) || ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE))) { + inf_bound = 2; + } + } + } + + return inf_bound; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of possible segmentations. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals. + * + * \return number of possible segmentations. + */ +/*--------------------------------------------------------------*/ + +double Sequences::nb_segmentation_computation(int index , int nb_segment , segment_model *model_type , + bool common_contrast) const + +{ + int i; + int inf_bound , seq_length; + double nb_segmentation; + + + inf_bound = prior_segment_length_inf_bound_computation(index , nb_segment , model_type , common_contrast); + seq_length = length[index == I_DEFAULT ? 0 : index]; + + nb_segmentation = 1.; + for (i = 1;i < nb_segment;i++) { + nb_segmentation *= (double)(seq_length - i - (inf_bound - 1) * nb_segment) / (double)i; + } + + return nb_segmentation; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the penalty shape. + * + * \param[in] index sequence index, + * \param[in] max_nb_segment maximum number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] penalty_shape_type penalty shape type. + * + * \return penalty shape. + */ +/*--------------------------------------------------------------*/ + +double* Sequences::penalty_shape_computation(int index , int max_nb_segment , segment_model *model_type , + bool common_contrast , int penalty_shape_type) const + +{ + int i , j; + int seq_length , inf_bound; + double buff , *penalty_shape; + + + inf_bound = 1; + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if ((index != I_DEFAULT) || (!common_contrast)) { + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == GAUSSIAN_CHANGE) || (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || + (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + inf_bound = 2; + } + if ((model_type[i] == LINEAR_MODEL_CHANGE) || (model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE)) { + inf_bound = 3; + } + } + } + + else { + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == LINEAR_MODEL_CHANGE) || (model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + inf_bound = 2; + } + } + } + + penalty_shape = new double[max_nb_segment + 1]; + + switch (penalty_shape_type) { + + case 0 : { + for (i = 1;i <= max_nb_segment;i++) { + penalty_shape[i] = i - 1; + } + break; + } + + case 1 : { + buff = 1.; + for (i = 1;i <= max_nb_segment;i++) { +// penalty_shape[i] = i - 1 + log(buff); + penalty_shape[i] = log(buff); + buff *= (double)(seq_length - i) / (double)i; + } + break; + } + + case 2 : { + buff = 1.; + for (i = 1;i <= max_nb_segment;i++) { + penalty_shape[i] = log(buff); + buff *= (double)seq_length / (double)i; + } + break; + } + + case 3 : { + buff = 1.; + for (i = 1;i <= max_nb_segment;i++) { + penalty_shape[i] = log(buff); + buff *= (double)(seq_length - 1) / (double)i; + } + break; + } + + case 4 : { + for (i = 1;i <= max_nb_segment;i++) { + buff = 1.; + for (j = 1;j < i;j++) { + buff *= (double)(seq_length - j - (inf_bound - 1) * i) / (double)j; + } + penalty_shape[i] = log(buff); + } + break; + } + + case 5 : { + for (i = 1;i <= max_nb_segment;i++) { + buff = 1.; + for (j = 1;j < i;j++) { + buff *= (double)(seq_length - (inf_bound - 1) * i) / (double)j; + } + penalty_shape[i] = log(buff); + } + break; + } + } + +# ifdef MESSAGE + double buff1 , buff2 , buff3 , buff4; + + cout << "\nPenalty shapes (" << inf_bound << ")" << endl; + buff = 1.; + buff1 = 1.; + buff2 = 1.; + for (i = 1;i <= max_nb_segment;i++) { + buff3 = 1.; + buff4 = 1.; + for (j = 1;j < i;j++) { + buff3 *= (double)(seq_length - (inf_bound - 1) * i) / (double)j; + buff4 *= (double)(seq_length - j - (inf_bound - 1) * i) / (double)j; + } + cout << i << " " << log(buff) << " " << log(buff1) << " " << log(buff2) << " | " << log(buff) - log(buff1) << " | " << log(buff) - log(buff2) + << " || " << log(buff3) << " " << log(buff4) << " | " << log(buff3) - log(buff4) << endl; + buff *= (double)seq_length / (double)i; + buff1 *= (double)(seq_length - 1) / (double)i; + buff2 *= (double)(seq_length - i) / (double)i; + } + cout << endl; +# endif + + return penalty_shape; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of segmentation and change-point entropies. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] rank ranks (for ordinal variables), + * \param[in] likelihood pointer on the log-likelihoods of all the possibles segmentations, + * \param[in] segmentation_entropy pointer on the segmentation entropies, + * \param[in] first_order_entropy pointer on the entropies assuming first-order dependencies, + * \param[in] change_point_entropy pointer on the change-point entropies considering or not change-point ranks, + * \param[in] uniform_entropy pointer on the entropies corresponding to a uniform distribution assumption, + * \param[in] marginal_entropy pointer on the marginal entropies. + * + * \return log-likelihood of the multiple change-point model. + */ +/*--------------------------------------------------------------*/ + +double Sequences::forward_backward(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + double **rank , double *likelihood , + long double *segmentation_entropy , + long double *first_order_entropy , + long double *change_point_entropy , double *uniform_entropy , + long double *marginal_entropy) const + +{ + int i , j , k , m; + int seq_length , *inf_bound_parameter , *seq_index_parameter; + double sum , buff , rlikelihood , **seq_mean , **hyperparam , **nb_segmentation_forward , + **nb_segmentation_backward , **smoothed , ***factorial , ***binomial_coeff; + long double segment_norm , sequence_norm , lbuff , *contrast , *normalized_contrast , *norm , + *backward_norm , *entropy_smoothed , *segment_predicted , **forward , + *forward_norm , **backward , **change_point , **forward_predicted_entropy , + **backward_predicted_entropy; + + + factorial = new double**[nb_variable]; + inf_bound_parameter = new int[nb_variable]; + binomial_coeff = new double**[nb_variable]; + seq_mean = new double*[nb_variable]; + seq_index_parameter = NULL; + hyperparam = new double*[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + + // computation of log of factorials for Poisson models + + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + factorial[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + factorial[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + factorial[i][j][k] = log_factorial(int_sequence[j][i][k]); + } + } + else { + factorial[i][j] = NULL; + } + } + } + + else { + factorial[i] = NULL; + } + + // computation of log of binomial coefficients for negative binomial models + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + + binomial_coeff[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + binomial_coeff[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + binomial_coeff[i][j][k] = log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[j][i][k]); + } + } + else { + binomial_coeff[i][j] = NULL; + } + } + } + + else { + binomial_coeff[i] = NULL; + } + + // computation of sequence means for Gaussian change in the variance models or + // stationary piecewise autoregressive models + + if ((model_type[i - 1] == VARIANCE_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + seq_mean[i] = new double[nb_sequence]; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += int_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += real_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + } + + else { + seq_mean[i] = new double[1]; + seq_mean[i][0] = 0.; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += int_sequence[k][i][j]; + } + } + } + + else { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += real_sequence[k][i][j]; + } + } + } + + seq_mean[i][0] /= (nb_sequence * length[0]); + } + } + + else { + seq_mean[i] = NULL; + } + + if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) && (!seq_index_parameter)) { + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + } + + // computation of hyperparameters for Bayesian Poisson and Gaussian models + + if (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) { + hyperparam[i] = new double[2]; + gamma_hyperparameter_computation(index , i , hyperparam[i]); + +# ifdef MESSAGE + cout << "\nGamma hyperparameters: " << hyperparam[i][0] << " " << hyperparam[i][1] << endl; +# endif + + } + + else if (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) { + hyperparam[i] = new double[4]; + gaussian_gamma_hyperparameter_computation(index , i , hyperparam[i]); + +# ifdef MESSAGE + cout << "\nGaussian gamma hyperparameters: " << hyperparam[i][0] << " " << hyperparam[i][1] + << " " << hyperparam[i][2] << " " << hyperparam[i][3] << endl; +# endif + + } + + else { + hyperparam[i] = NULL; + } + } + + seq_length = length[index == I_DEFAULT ? 0 : index]; + contrast = new long double[seq_length]; + normalized_contrast = new long double[seq_length]; + + nb_segmentation_forward = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + nb_segmentation_forward[i] = new double[nb_segment]; + } + + forward = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + forward[i] = new long double[nb_segment]; + } + + segment_predicted = new long double[seq_length]; + + forward_predicted_entropy = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + forward_predicted_entropy[i] = new long double[nb_segment]; + } + + norm = new long double[seq_length]; + forward_norm = new long double[seq_length]; + + nb_segmentation_backward = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + nb_segmentation_backward[i] = new double[nb_segment]; + } + + backward = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + backward[i] = new long double[nb_segment]; + } + + backward_predicted_entropy = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + backward_predicted_entropy[i] = new long double[nb_segment]; + } + + backward_norm = new long double[seq_length]; + + smoothed = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + smoothed[i] = new double[nb_segment]; + } + + change_point = new long double*[nb_segment]; + for (i = 1;i < nb_segment;i++) { + change_point[i] = new long double[seq_length]; + } + + entropy_smoothed = new long double[nb_segment]; + + // forward recurrence + + for (i = 0;i < seq_length;i++) { + + // computation of segment log-likelihoods + + forward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + // computation of the number of segmentations + + for (j = 0;j < nb_segment;j++) { + nb_segmentation_forward[i][j] = 0; + } + + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + if (contrast[0] != D_INF) { + nb_segmentation_forward[i][j]++; + } + } + + else { + for (k = i;k >= j;k--) { + if (contrast[k] != D_INF) { + nb_segmentation_forward[i][j] += nb_segmentation_forward[k - 1][j - 1]; + } + } + } + } + + // recurrence and computation of predicted entropies + + if (contrast[i] != D_INF) { + contrast[i] = expl(contrast[i]); + } + else { + contrast[i] = 0.; + } + + segment_norm = 0.; + for (j = i - 1;j >= 0;j--) { + segment_norm += norm[j]; + if (contrast[j] != D_INF) { + contrast[j] = expl(contrast[j] - segment_norm); + } + else { + contrast[j] = 0.; + } + } + + for (j = 0;j < nb_segment;j++) { + forward[i][j] = 0.; + forward_predicted_entropy[i][j] = 0.; + } + norm[i] = 0.; + + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + forward[i][j] = contrast[0]; + } + + else { + for (k = i;k >= j;k--) { +// forward[i][j] += contrast[k] * forward[k - 1][j - 1]; + segment_predicted[k] = contrast[k] * forward[k - 1][j - 1]; + forward[i][j] += segment_predicted[k]; + } + + if (forward[i][j] > 0.) { + for (k = i;k >= j;k--) { + lbuff = segment_predicted[k] / forward[i][j]; + if (lbuff > 0.) { + forward_predicted_entropy[i][j] += lbuff * (forward_predicted_entropy[k - 1][j - 1] - logl(lbuff)); + } + } + } + } + + norm[i] += forward[i][j]; + } + + if (norm[i] > 0.) { + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + forward[i][j] /= norm[i]; + } + + norm[i] = logl(norm[i]); + } + + forward_norm[i] = segment_norm + norm[i]; + } + + // computation of the entropies corresponding to a uniform distribution assumption for the possible segmentations + // for the different numbers of segments + + if (uniform_entropy) { + for (i = 1;i < nb_segment;i++) { + uniform_entropy[i] = log(nb_segmentation_forward[seq_length - 1][i]); + } + } + +# ifdef MESSAGE + cout << "\n"; +// buff = 1.; + for (i = 1;i < nb_segment;i++) { +// buff *= (double)(seq_length - i) / (double)i; + cout << i + 1 << " " << SEQ_label[SEQL_SEGMENTS] << ": " + << nb_segmentation_forward[seq_length - 1][i] + << " (" << nb_segmentation_computation(index , nb_segment , model_type , common_contrast) << ") | " + << log(nb_segmentation_forward[seq_length - 1][i]) << endl; + } +# endif + + // extraction of the log-likelihoods of the observed sequence for the different numbers of segments + + for (i = 0;i < nb_segment;i++) { + if (forward[seq_length - 1][i] > 0.) { + likelihood[i] = logl(forward[seq_length - 1][i]) + forward_norm[seq_length - 1]; + } + else { + likelihood[i] = D_INF; + } + } + + rlikelihood = likelihood[nb_segment - 1]; + + if (rlikelihood != D_INF) { + for (i = 1;i < nb_segment;i++) { + segmentation_entropy[i] = likelihood[i]; + } + + // backward recurrence + + for (i = seq_length - 1;i >= 0;i--) { + + // computation of segment log-likelihoods + + backward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + // computation of the number of possible segmentations + + for (j = 0;j < nb_segment;j++) { + nb_segmentation_backward[i][j] = 0; + } + + for (j = MAX((i == 0 ? 0 : 1) , nb_segment + i - seq_length);j < nb_segment;j++) { + if (j < nb_segment - 1) { + for (k = i;k <= seq_length + j - nb_segment;k++) { + if (contrast[k] != D_INF) { + nb_segmentation_backward[i][j] += nb_segmentation_backward[k + 1][j + 1]; + } + } + } + + else { + if (contrast[seq_length - 1] != D_INF) { + nb_segmentation_backward[i][j]++; + } + } + } + + // recurrence and computation of predicted entropies + + if (contrast[i] != D_INF) { + normalized_contrast[i] = expl(contrast[i]); + } + else { + normalized_contrast[i] = 0.; + } + + segment_norm = 0.; + for (j = i + 1;j < seq_length;j++) { + segment_norm += norm[j]; + if (contrast[j] != D_INF) { + normalized_contrast[j] = expl(contrast[j] - segment_norm); + } + else { + normalized_contrast[j] = 0.; + } + } + + for (j = 0;j < nb_segment;j++) { + backward[i][j] = 0.; + backward_predicted_entropy[i][j] = 0.; + smoothed[i][j] = 0.; + } + norm[i] = 0.; + + for (j = MAX((i == 0 ? 0 : 1) , nb_segment + i - seq_length);j < nb_segment;j++) { + if (j < nb_segment - 1) { + for (k = i;k <= seq_length + j - nb_segment;k++) { +// backward[i][j] += normalized_contrast[k] * backward[k + 1][j + 1]; + segment_predicted[k] = normalized_contrast[k] * backward[k + 1][j + 1]; + backward[i][j] += segment_predicted[k]; + } + + if (backward[i][j] > 0.) { + for (k = i;k <= seq_length + j - nb_segment;k++) { + lbuff = segment_predicted[k] / backward[i][j]; + if (lbuff > 0.) { + backward_predicted_entropy[i][j] += lbuff * (backward_predicted_entropy[k + 1][j + 1] - logl(lbuff)); + } + } + } + } + + else { + backward[i][j] = normalized_contrast[seq_length - 1]; + } + + norm[i] += backward[i][j]; + } + + if (norm[i] > 0.) { + for (j = MAX((i == 0 ? 0 : 1) , nb_segment + i - seq_length);j < nb_segment;j++) { + backward[i][j] /= norm[i]; + } + + norm[i] = logl(norm[i]); + } + + backward_norm[i] = segment_norm + norm[i]; + + // extraction of the smoothed probabilities + + if (i < seq_length - 1) { + for (j = MAX(0 , nb_segment + i - seq_length);j < MIN(nb_segment , i + 1);j++) { + smoothed[i][j] = smoothed[i + 1][j]; + if (j > 0) { + smoothed[i][j] -= forward[i][j - 1] * backward[i + 1][j] * sequence_norm; + } + if (j < nb_segment - 1) { + smoothed[i][j] += forward[i][j] * backward[i + 1][j + 1] * sequence_norm; + } + + if (smoothed[i][j] < 0.) { + smoothed[i][j] = 0.; + } + if (smoothed[i][j] > 1.) { + smoothed[i][j] = 1.; + } + } + } + + else { + smoothed[i][nb_segment - 1] = 1.; + } + + if (i == 0) { + sequence_norm = expl(backward_norm[i] - rlikelihood); + } + else { + sequence_norm = expl(forward_norm[i - 1] + backward_norm[i] - rlikelihood); + } + + // computation of posterior change-point probabilities for the different numbers of segments + + if (i == 0) { + +# ifdef MESSAGE + lbuff = backward[i][0] * sequence_norm; + if ((lbuff < 1. - DOUBLE_ERROR) || (lbuff > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << lbuff << " | " << 1 << endl; + } +# endif + + for (j = 1;j < nb_segment;j++) { + change_point[j][i] = 1.; + } + } + + else { + for (j = 1;j < nb_segment;j++) { + change_point[j][i] = 0.; + for (k = MAX(1 , j + 1 + i - seq_length);k <= MIN(j , i);k++) { + change_point[j][i] += forward[i - 1][k - 1] * backward[i][k + nb_segment - j - 1]; + } + change_point[j][i] *= expl(forward_norm[i - 1] + backward_norm[i] - likelihood[j]); + } + } + + // computation of the segmentation entropy + + segment_norm = 0.; + for (j = i;j < seq_length;j++) { + segment_norm += norm[j]; + if (contrast[j] != D_INF) { + normalized_contrast[j] = expl(contrast[j] - segment_norm); + } + else { + normalized_contrast[j] = 0.; + } + } + + // computation of the segmentation entropies for the different numbers of segments + + if (i == 0) { + for (j = i;j < seq_length - 1;j++) { + if (contrast[j] != D_INF) { + lbuff = normalized_contrast[j] * contrast[j]; + for (k = 1;k < MIN(nb_segment , seq_length - j);k++) { + segmentation_entropy[k] -= backward[j + 1][nb_segment - k] * + expl(backward_norm[i] - likelihood[k]) * lbuff; +// segmentation_entropy[k] -= normalized_contrast[j] * backward[j + 1][nb_segment - k] * +// expl(backward_norm[i] - likelihood[k]) * contrast[j]; + } + } + } + } + + else { + for (j = 1;j < nb_segment;j++) { + if (j < nb_segment - 1) { + for (k = i;k < seq_length;k++) { + if (contrast[k] != D_INF) { + lbuff = forward[i - 1][j - 1] * normalized_contrast[k] * contrast[k]; + for (m = j + 1;m < MIN(nb_segment , j + seq_length - k);m++) { + segmentation_entropy[m] -= backward[k + 1][j + nb_segment - m] * + expl(forward_norm[i - 1] + backward_norm[i] - likelihood[m]) * lbuff; +// segmentation_entropy[m] -= forward[i - 1][j - 1] * normalized_contrast[k] * backward[k + 1][j + nb_segment - m] * +// expl(forward_norm[i - 1] + backward_norm[i] - likelihood[m]) * contrast[k]; + } + } + } + } + + else { + if (contrast[seq_length - 1] != D_INF) { + lbuff = normalized_contrast[seq_length - 1] * contrast[seq_length - 1]; + for (k = 1;k < MIN(nb_segment , i + 1);k++) { + segmentation_entropy[k] -= forward[i - 1][k - 1] * + expl(forward_norm[i - 1] + backward_norm[i] - likelihood[k]) * lbuff; +// segmentation_entropy[k] -= forward[i - 1][k - 1] * normalized_contrast[seq_length - 1] * +// expl(forward_norm[i - 1] + backward_norm[i] - likelihood[k]) * contrast[seq_length - 1]; + } + } + } + } + } + } + +# ifdef DEBUG + cout << "\n" << SEQ_label[SEQL_SEGMENTATION_ENTROPY] << endl; + for (i = 1;i < nb_segment;i++) { + cout << i + 1 << " " << SEQ_label[SEQL_SEGMENTS] << ": " + << forward_predicted_entropy[seq_length - 1][i] << ", " + << backward_predicted_entropy[0][nb_segment - 1 - i] << ", " + << segmentation_entropy[i] << endl; + } +# endif + +# ifdef MESSAGE + for (i = 1;i < nb_segment;i++) { + if (nb_segmentation_backward[0][nb_segment - 1 - i] != nb_segmentation_forward[seq_length - 1][i]) { + cout << "\nERROR: " << i << " " << nb_segmentation_forward[seq_length - 1][i] + << " | " << nb_segmentation_backward[0][nb_segment - 1 - i] << endl; + } + } +# endif + +# ifdef MESSAGE + for (i = 0;i < seq_length - 1;i++) { + sum = 0.; + for (j = 0;j < nb_segment;j++) { + sum += smoothed[i][j]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << i << " | " << sum << endl; + } + } + + for (i = 1;i < nb_segment;i++) { + sum = 0.; + for (j = 0;j < seq_length;j++) { + sum += change_point[i][j]; + } + if ((sum < i + 1 - DOUBLE_ERROR) || (sum > i + 1 + DOUBLE_ERROR)) { + cout << "\nERROR: " << sum << " | " << i + 1 << endl; + } + } +# endif + + // computation of the ordered change-point entropies and the marginal entropies for + // the different numbers of segments + + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < i;j++) { + entropy_smoothed[j] = 0.; + } + entropy_smoothed[i] = 1.; + + first_order_entropy[i] = 0.; + marginal_entropy[i] = 0.; + + for (j = seq_length - 2;j >= 0;j--) { + sequence_norm = expl(forward_norm[j] + backward_norm[j + 1] - likelihood[i]); + +/* for (k = MIN(i , j + 1) + 1;k <= i;k++) { + entropy_smoothed[k] = 0.; + } */ + +// for (k = 0;k <= i;k++) { + for (k = MAX(0 , i + 1 + j - seq_length);k <= MIN(i , j + 1);k++) { + if (k > 0) { +// entropy_smoothed[k] -= forward[j][k - 1] * backward[j + 1][k + nb_segment - i - 1] * sequence_norm; + lbuff = forward[j][k - 1] * backward[j + 1][k + nb_segment - i - 1] * sequence_norm; + entropy_smoothed[k] -= lbuff; + if ((lbuff > 0.) && (lbuff < 1.)) { + first_order_entropy[i] -= lbuff * logl(lbuff); + } + } + if ((entropy_smoothed[k] > 0.) && (entropy_smoothed[k] < 1.)) { + first_order_entropy[i] -= entropy_smoothed[k] * logl(entropy_smoothed[k]); + } + + if (k < i) { + entropy_smoothed[k] += forward[j][k] * backward[j + 1][k + nb_segment - i] * sequence_norm; +/* lbuff = forward[j][k] * backward[j + 1][k + nb_segment - i] * sequence_norm; + entropy_smoothed[k] += lbuff; + if ((lbuff > 0.) && (lbuff < 1.)) { + first_order_entropy[i] -= lbuff * logl(lbuff); + } */ + } + + if (entropy_smoothed[k] < 0.) { + entropy_smoothed[k] = 0.; + } + if (entropy_smoothed[k] > 1.) { + entropy_smoothed[k] = 1.; + } + + if (entropy_smoothed[k] > 0.) { + first_order_entropy[i] += entropy_smoothed[k] * logl(entropy_smoothed[k]); + marginal_entropy[i] -= entropy_smoothed[k] * logl(entropy_smoothed[k]); + } + } + +# ifdef MESSAGE + sum = 0.; + for (k = 0;k <= i;k++) { + sum += entropy_smoothed[k]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << i + 1 << " " << j << " | " << sum << endl; + } +# endif + + } + } + + // computation of change-point entropies for the different numbers of segments + + for (i = 1;i < nb_segment;i++) { + change_point_entropy[i] = 0.; + for (j = 1;j < seq_length;j++) { + if ((change_point[i][j] > 0.) && (change_point[i][j] < 1.)) { + change_point_entropy[i] -= change_point[i][j] * logl(change_point[i][j]) + + (1 - change_point[i][j]) * logl(1 - change_point[i][j]); + } + } + } + } + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] factorial[i][j]; + } + delete [] factorial[i]; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] binomial_coeff[i][j]; + } + delete [] binomial_coeff[i]; + } + + delete [] seq_mean[i]; + delete [] hyperparam[i]; + } + delete [] factorial; + delete [] inf_bound_parameter; + delete [] binomial_coeff; + delete [] seq_mean; + delete [] hyperparam; + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + delete [] contrast; + delete [] normalized_contrast; + + for (i = 0;i < seq_length;i++) { + delete [] nb_segmentation_forward[i]; + } + delete [] nb_segmentation_forward; + + for (i = 0;i < seq_length;i++) { + delete [] forward[i]; + } + delete [] forward; + + delete [] segment_predicted; + + for (i = 0;i < seq_length;i++) { + delete [] forward_predicted_entropy[i]; + } + delete [] forward_predicted_entropy; + + delete [] norm; + delete [] forward_norm; + + for (i = 0;i < seq_length;i++) { + delete [] nb_segmentation_backward[i]; + } + delete [] nb_segmentation_backward; + + for (i = 0;i < seq_length;i++) { + delete [] backward[i]; + } + delete [] backward; + + for (i = 0;i < seq_length;i++) { + delete [] backward_predicted_entropy[i]; + } + delete [] backward_predicted_entropy; + seq_length = length[index == I_DEFAULT ? 0 : index]; + + delete [] backward_norm; + + for (i = 0;i < seq_length;i++) { + delete [] smoothed[i]; + } + delete [] smoothed; + + for (i = 1;i < nb_segment;i++) { + delete [] change_point[i]; + } + delete [] change_point; + + delete [] entropy_smoothed; + + return rlikelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Slope heuristic: date-driven slope estimation (log-likelihood as + * a function of the penalty shape). + * + * \param[in] min_nb_segment minimum number of segments, + * \param[in] max_nb_segment maximum number of segments, + * \param[in] penalty_shape_type penalty shape type, + * \param[in] penalty_shape pointer on the penalty shapes, + * \param[in] likelihood pointer on the log-likelihoods, + * \param[in] intercept references on the intercept, + * \param[in] slope references on the slope, + * \param[in] slope_standard_deviation slope standard deviation, + * \param[in] residual_standard_deviation residual standard deviation. + */ +/*--------------------------------------------------------------*/ + +void log_likelihood_slope(int min_nb_segment , int max_nb_segment , int penalty_shape_type , + double *penalty_shape , double *likelihood , + double &intercept , double &slope , double &slope_standard_deviation , + double &residual_standard_deviation) + +{ + int i; + int nb_model; + double diff , likelihood_mean , penalty_shape_mean , penalty_shape_variance , covariance , + residual_mean , residual_square_sum; + + + nb_model = max_nb_segment - min_nb_segment + 1; + + likelihood_mean = 0.; + for (i = min_nb_segment;i <= max_nb_segment;i++) { + likelihood_mean += likelihood[i]; + } + likelihood_mean /= nb_model; + + switch (penalty_shape_type) { + + case 0 : { + penalty_shape_mean = (double)(min_nb_segment + max_nb_segment - 2) / 2.; + penalty_shape_variance = (double)(nb_model * (nb_model * nb_model - 1)) / 12.; + +# ifdef DEBUG + double buff = 0.; + for (i = min_nb_segment;i <= max_nb_segment;i++) { + diff = i - 1 - penalty_shape_mean; + buff += diff * diff; + } + + cout << "TEST: " << penalty_shape_variance << " | " << buff << endl; +# endif + + break; + } + + default : { + penalty_shape_mean = 0.; + for (i = min_nb_segment;i <= max_nb_segment;i++) { + penalty_shape_mean += penalty_shape[i]; + } + penalty_shape_mean /= nb_model; + + penalty_shape_variance = 0.; + for (i = min_nb_segment;i <= max_nb_segment;i++) { + diff = penalty_shape[i] - penalty_shape_mean; + penalty_shape_variance += diff * diff; + } + break; + } + } + + covariance = 0.; + for (i = min_nb_segment;i <= max_nb_segment;i++) { + covariance += (likelihood[i] - likelihood_mean) * (penalty_shape[i] - penalty_shape_mean); + } + + slope = covariance / penalty_shape_variance; + intercept = likelihood_mean - slope * penalty_shape_mean; + + residual_mean = 0.; + residual_square_sum = 0.; + for (i = min_nb_segment;i <= max_nb_segment;i++) { + diff = likelihood[i] - (intercept + slope * penalty_shape[i]); + residual_mean += diff; + residual_square_sum += diff * diff; + } + residual_mean /= nb_model; + + if (nb_model > 2) { + residual_square_sum /= (nb_model - 2); + slope_standard_deviation = sqrt(residual_square_sum / penalty_shape_variance); + } + + residual_standard_deviation = 0.; + for (i = min_nb_segment;i <= max_nb_segment;i++) { + diff = likelihood[i] - (intercept + slope * penalty_shape[i]) - residual_mean; + residual_standard_deviation += diff * diff; + } + if (nb_model > 2) { + residual_standard_deviation = sqrt(residual_standard_deviation / (nb_model - 2)); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Slope heuristic: dimension jump method. + * + * \param[in] os stream for displaying the outputs of the dimension jump method, + * \param[in] max_nb_segment maximum number of segments, + * \param[in] slope_step step on the slope, + * \param[in] penalty_shape pointer on the penalty shapes, + * \param[in] likelihood pointer on the log-likelihoods. + * + * \return optimal slope. + */ +/*--------------------------------------------------------------*/ + +double dimension_jump(ostream *os , int max_nb_segment , double slope_step , + double *penalty_shape , double *likelihood) + +{ + int i , j; + int max_diff_nb_segment , nb_segment , previous_nb_segment , nb_step , *step_nb_segment; + double slope , optimal_slope , max_likelihood , penalized_likelihood , *begin_slope , *end_slope; + + + step_nb_segment = new int[max_nb_segment + 1]; + begin_slope = new double[max_nb_segment + 1]; + end_slope = new double[max_nb_segment + 1]; + + max_diff_nb_segment = 0; + slope = slope_step; + i = 0; + + do { + max_likelihood = D_INF; + for (j = 1;j <= max_nb_segment;j++) { + penalized_likelihood = 2 * (likelihood[j] - 2 * penalty_shape[j] * slope); + if (penalized_likelihood > max_likelihood) { + max_likelihood = penalized_likelihood; + nb_segment = j; + } + } + + if (slope == slope_step) { + step_nb_segment[i] = nb_segment; + begin_slope[i] = slope; + } + + else { + if (previous_nb_segment > nb_segment) { + end_slope[i] = slope - slope_step; + i++; + step_nb_segment[i] = nb_segment; + begin_slope[i] = slope; + + if (previous_nb_segment - nb_segment > max_diff_nb_segment) { + max_diff_nb_segment = previous_nb_segment - nb_segment; + optimal_slope = slope; + } + } + } + + previous_nb_segment = nb_segment; + slope += slope_step; + } + while ((nb_segment > 1) && (slope <= MAX_SLOPE)); + + end_slope[i] = ceil(slope); + nb_step = i + 1; + + if (max_diff_nb_segment < MIN_DIMENSION_JUMP) { + optimal_slope = D_DEFAULT; + } + else { + optimal_slope *= 2; + } + + // display of the outputs of the dimension jump method + + if (os) { + int width[2]; + ios_base::fmtflags format_flags; + + format_flags = os->setf(ios::left , ios::adjustfield); + + width[0] = stat_tool::column_width(max_nb_segment); + width[1] = stat_tool::column_width(1 , begin_slope + nb_step - 1); + + *os << "\n" << SEQ_label[SEQL_PIECEWISE_STEP_FUNCTION] << endl; + for (i = 0;i < nb_step;i++) { + *os << setw(width[0]) << step_nb_segment[i] << " " + << setw(width[1]) << begin_slope[i] << " -> " + << setw(width[1]) << end_slope[i] << endl; + } + + *os << "\n" << SEQ_label[SEQL_DIMENSION_JUMP] << ": " << max_diff_nb_segment; + if (optimal_slope > 0.) { + *os << " " << SEQ_label[SEQL_OPTIMAL_SLOPE] << ": " << optimal_slope; + } + *os << endl; + + os->setf(format_flags , ios::adjustfield); + } + + delete [] step_nb_segment; + delete [] begin_slope; + delete [] end_slope; + + return optimal_slope; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Optimal segmentation of a single sequence or a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the results of multiple change-point inference, + * \param[in] iidentifier sequence identifier, + * \param[in] max_nb_segment maximum number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] criterion criterion for the selection of the number of segments, + * \param[in] min_nb_segment minimum number of segments, + * \param[in] penalty_shape_type penalty shape for the slope heuristic, + * \param[in] output output (sequence, entropies or Kullback-Leibler divergences). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation(StatError &error , ostream *os , int iidentifier , + int max_nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + model_selection_criterion criterion , int min_nb_segment , + int penalty_shape_type , sequence_type output) const + +{ + bool status = true , bayesian; + int i , j; + int index , nb_segment , inb_sequence , *nb_parameter , ilength[4]; + variable_nature itype[1]; + double buff , segmentation_intercept , segmentation_slope , segmentation_slope_standard_deviation , + segmentation_residual_standard_deviation , segmentation_dimension_jump_slope , intercept , + slope , slope_standard_deviation , residual_standard_deviation , dimension_jump_slope , + scaling_factor , max_likelihood[6] , *segmentation_likelihood , *segment_penalty , *penalty_shape , + **penalized_likelihood , *likelihood , *uniform_entropy , *segmentation_divergence , **rank; + long double *segmentation_entropy , *first_order_entropy , *change_point_entropy , *marginal_entropy; + Sequences *iseq , *seq , *oseq; + + + oseq = NULL; + error.init(); + +/* if (((index_param_type == TIME) && (index_interval->variance > 0.)) || + (index_param_type == POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + if (index_param_type == POSITION) { + status = false; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE] , SEQ_index_parameter_word[TIME]); + } */ + + if ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((output != SEQUENCE) && (output != LOG_LIKELIHOOD_SLOPE)) { + status = false; + ostringstream correction_message; + correction_message << SEQ_label[SEQL_SEQUENCE] << " or " + << STAT_criterion_word[LOG_LIKELIHOOD_SLOPE]; + error.correction_update(SEQ_error[SEQR_FORBIDDEN_OUTPUT] , (correction_message.str()).c_str()); + } + + if (criterion == LIKELIHOOD_SLOPE) { + criterion = SEGMENTATION_LIKELIHOOD_SLOPE; + } + else if (criterion == DIMENSION_JUMP) { + criterion = SEGMENTATION_DIMENSION_JUMP; + } + else if (criterion == ICL) { + criterion = mBIC; + } + +/* if ((criterion == LIKELIHOOD_SLOPE) || (criterion == ICL)) { + status = false; + ostringstream correction_message; + correction_message << STAT_criterion_word[SEGMENTATION_LIKELIHOOD_SLOPE] << " or " + << STAT_criterion_word[mBIC]; + error.correction_update(SEQ_error[SEQR_FORBIDDEN_CRITERION] , (correction_message.str()).c_str()); + } */ + } + + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == POISSON_CHANGE) || + (model_type[i] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i] == BAYESIAN_POISSON_CHANGE)) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (((model_type[i] != NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 0)) || + ((model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 1))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else if (model_type[i] == CATEGORICAL_CHANGE) { + if ((marginal_distribution[i]->nb_value < 2) || + (marginal_distribution[i]->nb_value > NB_OUTPUT)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_VALUE]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + else if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (((model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && + (index_param_type != IMPLICIT_TYPE) && (index_interval->variance > 0.)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + } + + if (iidentifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (iidentifier == identifier[i]) { + index = i; + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + else { + index = I_DEFAULT; + if (length_distribution->variance > 0.) { + status = false; + error.update(SEQ_error[SEQR_VARIABLE_SEQUENCE_LENGTH]); + } + } + + if (status) { + if ((max_nb_segment < 2) || (max_nb_segment > length[index == I_DEFAULT ? 0 : index] / 2)) { + status = false; + error.update(SEQ_error[SEQR_MAX_NB_SEGMENT]); + } + + if (min_nb_segment == 0) { + min_nb_segment = max_nb_segment / 2; + + if (criterion == LIKELIHOOD_SLOPE) { + criterion = DIMENSION_JUMP; + } + else if (criterion == SEGMENTATION_LIKELIHOOD_SLOPE) { + criterion = SEGMENTATION_DIMENSION_JUMP; + } + } + + else if (min_nb_segment < 2) { + status = false; + error.update(SEQ_error[SEQR_MIN_NB_SEGMENT]); + } + } + + if (status) { + if (max_nb_segment - min_nb_segment < SLOPE_NB_SEGMENT_RANGE) { + if (criterion == LIKELIHOOD_SLOPE) { + criterion = DIMENSION_JUMP; + } + else if (criterion == SEGMENTATION_LIKELIHOOD_SLOPE) { + criterion = SEGMENTATION_DIMENSION_JUMP; + } + + if (output == LOG_LIKELIHOOD_SLOPE) { + output = SEQUENCE; + } + } + + if (index != I_DEFAULT) { + iseq = new Sequences(*this , 1 , &index); + seq = new Sequences(*iseq , ADD_STATE_VARIABLE); + delete iseq; + } + else { + seq = new Sequences(*this , ADD_STATE_VARIABLE); + } + + // rank computation for ordinal variables + + rank = new double*[seq->nb_variable]; + + for (i = 1;i < seq->nb_variable;i++) { + if (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) { + rank[i] = marginal_distribution[i - 1]->rank_computation(); + } + else { + rank[i] = NULL; + } + } + + segmentation_likelihood = new double[max_nb_segment + 2]; + nb_parameter = new int[max_nb_segment + 2]; + + if ((model_type[0] == BAYESIAN_POISSON_CHANGE) || (model_type[0] == BAYESIAN_GAUSSIAN_CHANGE)) { + bayesian = true; +// nb_segment = 2; + + penalized_likelihood = new double*[3]; + penalized_likelihood[2] = new double[max_nb_segment + 1]; + } + + else { + bayesian = false; + + segment_penalty = new double[max_nb_segment + 2]; + + penalized_likelihood = new double*[6]; + for (i = 0;i < 6;i++) { + penalized_likelihood[i] = new double[max_nb_segment + 1]; + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + likelihood = new double[max_nb_segment + 1]; + segmentation_entropy = new long double[max_nb_segment + 1]; + first_order_entropy = new long double[max_nb_segment + 1]; + change_point_entropy = new long double[max_nb_segment + 1]; + uniform_entropy = new double[max_nb_segment + 1]; + segmentation_divergence = new double[max_nb_segment + 1]; + marginal_entropy = new long double[max_nb_segment + 1]; + } + + if (max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) { + penalty_shape = penalty_shape_computation(index , max_nb_segment , model_type , common_contrast , penalty_shape_type); + } + + seq->segmentation((index == I_DEFAULT ? index : 0) , max_nb_segment , model_type , + common_contrast , shape_parameter , rank , + segmentation_likelihood + 1 , nb_parameter + 1 , + (bayesian ? NULL : segment_penalty + 1)); + + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + log_likelihood_slope(min_nb_segment , max_nb_segment , penalty_shape_type , penalty_shape , + segmentation_likelihood , segmentation_intercept , segmentation_slope , + segmentation_slope_standard_deviation , segmentation_residual_standard_deviation); + } + + if (max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) { + segmentation_dimension_jump_slope = dimension_jump(os , max_nb_segment , SLOPE_STEP , penalty_shape , + segmentation_likelihood); + if ((segmentation_dimension_jump_slope < 0.) && (criterion == SEGMENTATION_DIMENSION_JUMP)) { + criterion = mBIC; + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + seq->forward_backward((index == I_DEFAULT ? index : 0) , max_nb_segment , model_type , + common_contrast , shape_parameter , rank , + likelihood + 1 , segmentation_entropy + 1 , + first_order_entropy + 1 , change_point_entropy + 1 , + uniform_entropy + 1 , marginal_entropy + 1); + + segmentation_divergence[1] = 0; + for (i = 2;i <= max_nb_segment;i++) { + segmentation_divergence[i] = uniform_entropy[i] - segmentation_entropy[i]; + } + + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + log_likelihood_slope(min_nb_segment , max_nb_segment , penalty_shape_type , + penalty_shape , likelihood , intercept , slope , + slope_standard_deviation , residual_standard_deviation); + +# ifdef MESSAGE + if (penalty_shape_type != 0) { + cout << "\nTEST, " << STAT_criterion_word[LIKELIHOOD_SLOPE] << ": " << slope << " | " + << (likelihood[max_nb_segment] - likelihood[max_nb_segment - 1]) / + (penalty_shape[max_nb_segment] - penalty_shape[max_nb_segment - 1]) << endl; + } +# endif + + } + + if (max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) { + dimension_jump_slope = dimension_jump(os , max_nb_segment , SLOPE_STEP , penalty_shape , likelihood); + if ((dimension_jump_slope < 0.) && (criterion == DIMENSION_JUMP)) { + criterion = ICL; + } + } + } + + if (bayesian) { + if (likelihood[1] != D_INF) { + penalized_likelihood[2][1] = 2 * likelihood[1]; + max_likelihood[2] = penalized_likelihood[2][1]; + nb_segment = 1; + } + + else { + max_nb_segment = 0; + nb_segment = 0; + } + + for (i = 2;i <= max_nb_segment;i++) { + if (likelihood[i] != D_INF) { +// penalized_likelihood[2][i] = 2 * (likelihood[i] - segmentation_entropy[i]); + penalized_likelihood[2][i] = 2 * (likelihood[i] - segmentation_entropy[i] - + uniform_entropy[i]); + + if (penalized_likelihood[2][i] > max_likelihood[2]) { + max_likelihood[2] = penalized_likelihood[2][i]; + nb_segment = i; + } + } + + else { + max_nb_segment = i - 1; + break; + } + } + } + + else { + + // computation of penalized likelihoods corresponding to the slope heuristic (data-driven slope estimation and + // dimension jump), the ICL criterion and the modified BIC (Zhang & Siegmund, 2007) + +/* segmentation_likelihood[1] = seq->one_segment_likelihood((index == I_DEFAULT ? index : 0) , model_type , + common_contrast , shape_parameter , rank); + nb_parameter[1] = seq->nb_parameter_computation((index == I_DEFAULT ? index : 0) , 1 , model_type , + common_contrast); */ + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE) && + (likelihood[1] != D_INF)) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { +// penalized_likelihood[0][1] = 2 * (likelihood[1] - 1.5 * penalty_shape[1] * slope); + penalized_likelihood[0][1] = 2 * (likelihood[1] - 2 * penalty_shape[1] * slope); + max_likelihood[0] = penalized_likelihood[0][1]; + } + + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (dimension_jump_slope > 0.)) { + penalized_likelihood[1][1] = 2 * (likelihood[1] - 2 * penalty_shape[1] * dimension_jump_slope); + max_likelihood[1] = penalized_likelihood[1][1]; + } + + penalized_likelihood[2][1] = 2 * likelihood[1] - nb_parameter[1] * + log((double)(seq->nb_sequence * seq->length[0])); + max_likelihood[2] = penalized_likelihood[2][1]; + + nb_segment = 1; + } + + if (segmentation_likelihood[1] != D_INF) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { +// penalized_likelihood[3][1] = 2 * (segmentation_likelihood[1] - 1.5 * penalty_shape[1] * segmentation_slope); + penalized_likelihood[3][1] = 2 * (segmentation_likelihood[1] - 2 * penalty_shape[1] * segmentation_slope); + max_likelihood[3] = penalized_likelihood[3][1]; + } + + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + penalized_likelihood[4][1] = 2 * (segmentation_likelihood[1] - 2 * penalty_shape[1] * segmentation_dimension_jump_slope); + max_likelihood[4] = penalized_likelihood[4][1]; + } + + penalized_likelihood[5][1] = 2 * segmentation_likelihood[1] - nb_parameter[1] * + log((double)(seq->nb_sequence * seq->length[0])) - segment_penalty[1]; + max_likelihood[5] = penalized_likelihood[5][1]; + + if ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + nb_segment = 1; + } + } + + if (((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE) && + (likelihood[1] == D_INF)) || (segmentation_likelihood[1] == D_INF)) { + max_nb_segment = 0; + nb_segment = 0; + } + +/* segmentation_likelihood[2] = seq->segmentation((index == I_DEFAULT ? index : 0) , 2 , model_type , + common_contrast , shape_parameter , rank); + nb_parameter[2] = seq->nb_parameter_computation((index == I_DEFAULT ? index : 0) , 2 , model_type , + common_contrast); */ + + for (i = 2;i <= max_nb_segment;i++) { +/* segmentation_likelihood[i + 1] = seq->segmentation((index == I_DEFAULT ? index : 0) , i + 1 , model_type , + common_contrast , shape_parameter , rank); + nb_parameter[i + 1] = seq->nb_parameter_computation((index == I_DEFAULT ? index : 0) , i + 1 , model_type , + common_contrast); */ + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE) && + (likelihood[i] != D_INF)) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { +// penalized_likelihood[0][i] = 2 * (likelihood[i] - 1.5 * penalty_shape[i] * slope); + penalized_likelihood[0][i] = 2 * (likelihood[i] - 2 * penalty_shape[i] * slope); + if (penalized_likelihood[0][i] > max_likelihood[0]) { + max_likelihood[0] = penalized_likelihood[0][i]; + if (criterion == LIKELIHOOD_SLOPE) { + nb_segment = i; + } + } + } + + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (dimension_jump_slope > 0.)) { + penalized_likelihood[1][i] = 2 * (likelihood[i] - 2 * penalty_shape[i] * dimension_jump_slope); + if (penalized_likelihood[1][i] > max_likelihood[1]) { + max_likelihood[1] = penalized_likelihood[1][i]; + if (criterion == DIMENSION_JUMP) { + nb_segment = i; + } + } + } + + penalized_likelihood[2][i] = 2 * (likelihood[i] - segmentation_entropy[i]) - nb_parameter[i] * + log((double)(seq->nb_sequence * seq->length[0])); + if (penalized_likelihood[2][i] > max_likelihood[2]) { + max_likelihood[2] = penalized_likelihood[2][i]; + if (criterion == ICL) { + nb_segment = i; + } + } + } + + if (segmentation_likelihood[i] != D_INF) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { +// penalized_likelihood[3][i] = 2 * (segmentation_likelihood[i] - +// 1.5 * penalty_shape[i] * segmentation_slope); + penalized_likelihood[3][i] = 2 * (segmentation_likelihood[i] - + 2 * penalty_shape[i] * segmentation_slope); + if (penalized_likelihood[3][i] > max_likelihood[3]) { + max_likelihood[3] = penalized_likelihood[3][i]; + if (criterion == SEGMENTATION_LIKELIHOOD_SLOPE) { + nb_segment = i; + } + } + } + + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + penalized_likelihood[4][i] = 2 * (segmentation_likelihood[i] - + 2 * penalty_shape[i] * segmentation_dimension_jump_slope); + if (penalized_likelihood[4][i] > max_likelihood[4]) { + max_likelihood[4] = penalized_likelihood[4][i]; + if (criterion == SEGMENTATION_DIMENSION_JUMP) { + nb_segment = i; + } + } + } + + penalized_likelihood[5][i] = 2 * segmentation_likelihood[i] - nb_parameter[i] * + log((double)(seq->nb_sequence * seq->length[0])) - segment_penalty[i]; + if (penalized_likelihood[5][i] > max_likelihood[5]) { + max_likelihood[5] = penalized_likelihood[5][i]; + if (criterion == mBIC) { + nb_segment = i; + } + } + } + + if (((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE) && + (likelihood[i] == D_INF)) || (segmentation_likelihood[i] == D_INF)) { + max_nb_segment = i - 1; + break; + } + } + } + + if (nb_segment > 0) { + if (os) { + int width[23]; + ios_base::fmtflags format_flags; + double norm , *posterior_probability , **weight; + Test *test; + + + format_flags = os->setf(ios::left , ios::adjustfield); + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + posterior_probability = new double[max_nb_segment + 1]; + + likelihood[1] = segmentation_likelihood[1]; + posterior_probability[1] = 1.; + for (i = 2;i <= max_nb_segment;i++) { + posterior_probability[i] = exp(segmentation_likelihood[i] - likelihood[i]); + } + } + + if (bayesian) { + weight = new double*[3]; + weight[2] = new double[max_nb_segment + 1]; + } + else { + weight = new double*[6]; + for (i = 0;i < 6;i++) { + weight[i] = new double[max_nb_segment + 1]; + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + norm = 0.; + for (i = 1;i <= max_nb_segment;i++) { + weight[2][i] = exp((penalized_likelihood[2][i] - max_likelihood[2]) / 2); + norm += weight[2][i]; + } + for (i = 1;i <= max_nb_segment;i++) { + weight[2][i] /= norm; + } + } + + if (!bayesian) { + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + norm = 0.; + for (i = 1;i <= max_nb_segment;i++) { + weight[0][i] = exp((penalized_likelihood[0][i] - max_likelihood[0]) / 2); + norm += weight[0][i]; + } + for (i = 1;i <= max_nb_segment;i++) { + weight[0][i] /= norm; + } + + test = new Test(STUDENT , false , max_nb_segment - min_nb_segment - 1 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + *os << STAT_criterion_word[LIKELIHOOD_SLOPE] << ": " << slope << " (" + << slope - test->value * slope_standard_deviation << ", " + << slope + test->value * slope_standard_deviation << ") | " + << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ": " + << residual_standard_deviation << endl; + + delete test; + } + + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (dimension_jump_slope > 0.)) { + norm = 0.; + for (i = 1;i <= max_nb_segment;i++) { + weight[1][i] = exp((penalized_likelihood[1][i] - max_likelihood[1]) / 2); + norm += weight[1][i]; + } + for (i = 1;i <= max_nb_segment;i++) { + weight[1][i] /= norm; + } + } + } + + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + norm = 0.; + for (i = 1;i <= max_nb_segment;i++) { + weight[3][i] = exp((penalized_likelihood[3][i] - max_likelihood[3]) / 2); + norm += weight[3][i]; + } + for (i = 1;i <= max_nb_segment;i++) { + weight[3][i] /= norm; + } + + test = new Test(STUDENT , false , max_nb_segment - min_nb_segment - 1 , I_DEFAULT , D_DEFAULT); + test->critical_probability = ref_critical_probability[0]; + test->t_value_computation(); + + *os << STAT_criterion_word[SEGMENTATION_LIKELIHOOD_SLOPE] << ": " << segmentation_slope << " (" + << segmentation_slope - test->value * segmentation_slope_standard_deviation << ", " + << segmentation_slope + test->value * segmentation_slope_standard_deviation << ") | " + << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ": " + << segmentation_residual_standard_deviation << endl; + + delete test; + } + + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + norm = 0.; + for (i = 1;i <= max_nb_segment;i++) { + weight[4][i] = exp((penalized_likelihood[4][i] - max_likelihood[4]) / 2); + norm += weight[4][i]; + } + for (i = 1;i <= max_nb_segment;i++) { + weight[4][i] /= norm; + } + } + + norm = 0.; + for (i = 1;i <= max_nb_segment;i++) { + weight[5][i] = exp((penalized_likelihood[5][i] - max_likelihood[5]) / 2); + norm += weight[5][i]; + } + for (i = 1;i <= max_nb_segment;i++) { + weight[5][i] /= norm; + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + width[0] = stat_tool::column_width(max_nb_segment) + ASCII_SPACE; + width[1] = stat_tool::column_width(max_nb_segment , segmentation_likelihood + 1 , 2.) + ASCII_SPACE; + width[2] = stat_tool::column_width(max_nb_segment , likelihood + 1 , 2.) + ASCII_SPACE; + width[3] = stat_tool::column_width(max_nb_segment , posterior_probability + 1) + ASCII_SPACE; + width[4] = column_width(max_nb_segment - 1 , segmentation_entropy + 2) + ASCII_SPACE; + width[5] = column_width(max_nb_segment - 1 , first_order_entropy + 2) + ASCII_SPACE; + width[6] = column_width(max_nb_segment - 1 , change_point_entropy + 2) + ASCII_SPACE; + width[7] = stat_tool::column_width(max_nb_segment - 1 , uniform_entropy + 2) + ASCII_SPACE; + width[8] = stat_tool::column_width(max_nb_segment - 1 , segmentation_divergence + 2) + ASCII_SPACE; +// width[9] = column_width(max_nb_segment - 1 , marginal_entropy + 2) + ASCII_SPACE + width[10] = stat_tool::column_width(nb_parameter[max_nb_segment]) + ASCII_SPACE; + + if (!bayesian) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + width[11] = stat_tool::column_width(max_nb_segment , penalized_likelihood[0] + 1) + ASCII_SPACE; + width[12] = stat_tool::column_width(max_nb_segment , weight[0] + 1) + ASCII_SPACE; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (dimension_jump_slope > 0.)) { + width[13] = stat_tool::column_width(max_nb_segment , penalized_likelihood[1] + 1) + ASCII_SPACE; + width[14] = stat_tool::column_width(max_nb_segment , weight[1] + 1) + ASCII_SPACE; + } + } + + width[15] = stat_tool::column_width(max_nb_segment , penalized_likelihood[2] + 1) + ASCII_SPACE; + width[16] = stat_tool::column_width(max_nb_segment , weight[2] + 1) + ASCII_SPACE; + + if (!bayesian) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + width[17] = stat_tool::column_width(max_nb_segment , penalized_likelihood[3] + 1) + ASCII_SPACE; + width[18] = stat_tool::column_width(max_nb_segment , weight[3] + 1) + ASCII_SPACE; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + width[19] = stat_tool::column_width(max_nb_segment , penalized_likelihood[4] + 1) + ASCII_SPACE; + width[20] = stat_tool::column_width(max_nb_segment , weight[4] + 1) + ASCII_SPACE; + } + width[21] = stat_tool::column_width(max_nb_segment , penalized_likelihood[5] + 1) + ASCII_SPACE; + width[22] = stat_tool::column_width(max_nb_segment , weight[5] + 1) + ASCII_SPACE; + } + + *os << "\n" << SEQ_label[SEQL_NB_SEGMENT] << " | 2 * " << STAT_label[STATL_LIKELIHOOD] + << " | 2 * " << SEQ_label[SEQL_POSSIBLE_SEGMENTATION_LIKELIHOOD] + << " | " << SEQ_label[SEQL_POSTERIOR_PROBABILITY] + << " | " << SEQ_label[SEQL_SEGMENTATION_ENTROPY] + << " | " << SEQ_label[SEQL_FIRST_ORDER_ENTROPY] + << " | " << SEQ_label[SEQL_CHANGE_POINT_ENTROPY] + << " | " << SEQ_label[SEQL_UNIFORM_ENTROPY] + << " | " << SEQ_label[SEQL_SEGMENTATION_DIVERGENCE] << endl; +// << " | " << SEQ_label[SEQL_MARGINAL_ENTROPY] + + *os << setw(width[0]) << 1 + << setw(width[1]) << 2 * segmentation_likelihood[1] + << setw(width[2]) << 2 * likelihood[1] + << setw(width[3]) << posterior_probability[1] + << setw(width[4]) << " " + << setw(width[5]) << " " + << setw(width[6]) << " " + << setw(width[7]) << " " + << setw(width[8]) << segmentation_divergence[1] << endl; +// << setw(width[9]) << " " + + for (i = 2;i <= max_nb_segment;i++) { + *os << setw(width[0]) << i + << setw(width[1]) << 2 * segmentation_likelihood[i] + << setw(width[2]) << 2 * likelihood[i] + << setw(width[3]) << posterior_probability[i] + << setw(width[4]) << segmentation_entropy[i] + << setw(width[5]) << first_order_entropy[i] + << setw(width[6]) << change_point_entropy[i] + << setw(width[7]) << uniform_entropy[i] + << setw(width[8]) << segmentation_divergence[i] << endl; +// << setw(width[9]) << marginal_entropy[i] + } + + *os << "\n" << SEQ_label[SEQL_NB_SEGMENT] << " | 2 * " << STAT_label[STATL_LIKELIHOOD] + << " | 2 * " << SEQ_label[SEQL_POSSIBLE_SEGMENTATION_LIKELIHOOD] + << " | " << SEQ_label[SEQL_POSTERIOR_PROBABILITY] + << " | " << SEQ_label[SEQL_SEGMENTATION_DIVERGENCE] + << " | " << STAT_label[STATL_FREE_PARAMETERS]; + + if (!bayesian) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + *os << " | " << STAT_criterion_word[LIKELIHOOD_SLOPE] << " - " << STAT_label[STATL_WEIGHT]; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (dimension_jump_slope > 0.)) { + *os << " | " << STAT_criterion_word[DIMENSION_JUMP] << " - " << STAT_label[STATL_WEIGHT]; + } + } + + *os << " | " << STAT_criterion_word[ICL] << " - " << STAT_label[STATL_WEIGHT]; + + if (!bayesian) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + *os << " | " << STAT_criterion_word[SEGMENTATION_LIKELIHOOD_SLOPE] << " - " << STAT_label[STATL_WEIGHT]; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + *os << " | " << STAT_criterion_word[SEGMENTATION_DIMENSION_JUMP] << " - " << STAT_label[STATL_WEIGHT]; + } + *os << " | " << STAT_criterion_word[mBIC] << " - " << STAT_label[STATL_WEIGHT]; + } + *os << endl; + + for (i = 1;i <= max_nb_segment;i++) { + *os << setw(width[0]) << i + << setw(width[1]) << 2 * segmentation_likelihood[i] + << setw(width[2]) << 2 * likelihood[i] + << setw(width[3]) << posterior_probability[i] + << setw(width[8]) << segmentation_divergence[i] + << setw(width[10]) << nb_parameter[i]; + + if (!bayesian) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + *os << setw(width[11]) << penalized_likelihood[0][i] + << setw(width[12]) << weight[0][i]; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (dimension_jump_slope > 0.)) { + *os << setw(width[13]) << penalized_likelihood[1][i] + << setw(width[14]) << weight[1][i]; + } + } + + *os << setw(width[15]) << penalized_likelihood[2][i] + << setw(width[16]) << weight[2][i]; + + if (!bayesian) { + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + *os << setw(width[17]) << penalized_likelihood[3][i] + << setw(width[18]) << weight[3][i]; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + *os << setw(width[19]) << penalized_likelihood[4][i] + << setw(width[20]) << weight[4][i]; + } + *os << setw(width[21]) << penalized_likelihood[5][i] + << setw(width[22]) << weight[5][i]; + } + *os << endl; + } + } + + else { + width[0] = stat_tool::column_width(max_nb_segment) + ASCII_SPACE; + width[1] = stat_tool::column_width(max_nb_segment , segmentation_likelihood + 1 , 2.) + ASCII_SPACE; + width[10] = stat_tool::column_width(nb_parameter[max_nb_segment]) + ASCII_SPACE; + + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + width[17] = stat_tool::column_width(max_nb_segment , penalized_likelihood[3] + 1) + ASCII_SPACE; + width[18] = stat_tool::column_width(max_nb_segment , weight[3] + 1) + ASCII_SPACE; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + width[19] = stat_tool::column_width(max_nb_segment , penalized_likelihood[4] + 1) + ASCII_SPACE; + width[20] = stat_tool::column_width(max_nb_segment , weight[4] + 1) + ASCII_SPACE; + } + width[21] = stat_tool::column_width(max_nb_segment , penalized_likelihood[5] + 1) + ASCII_SPACE; + width[22] = stat_tool::column_width(max_nb_segment , weight[5] + 1) + ASCII_SPACE; + + *os << "\n" << SEQ_label[SEQL_NB_SEGMENT] << " | 2 * " << STAT_label[STATL_LIKELIHOOD] + << " | " << STAT_label[STATL_FREE_PARAMETERS]; + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + *os << " | " << STAT_criterion_word[SEGMENTATION_LIKELIHOOD_SLOPE] << " - " << STAT_label[STATL_WEIGHT]; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + *os << " | " << STAT_criterion_word[SEGMENTATION_DIMENSION_JUMP] << " - " << STAT_label[STATL_WEIGHT]; + } + *os << " | " << STAT_criterion_word[mBIC] << " - " << STAT_label[STATL_WEIGHT] << endl; + + for (i = 1;i <= max_nb_segment;i++) { + *os << setw(width[0]) << i + << setw(width[1]) << 2 * segmentation_likelihood[i] + << setw(width[10]) << nb_parameter[i]; + if (max_nb_segment - min_nb_segment >= SLOPE_NB_SEGMENT_RANGE) { + *os << setw(width[17]) << penalized_likelihood[3][i] + << setw(width[18]) << weight[3][i]; + } + if ((max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) && (segmentation_dimension_jump_slope > 0.)) { + *os << setw(width[19]) << penalized_likelihood[4][i] + << setw(width[20]) << weight[4][i]; + } + *os << setw(width[21]) << penalized_likelihood[5][i] + << setw(width[22]) << weight[5][i] << endl; + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + delete [] posterior_probability; + } + + if (bayesian) { + delete [] weight[2]; + } + else { + for (i = 0;i < 6;i++) { + delete [] weight[i]; + } + } + + delete [] weight; + + os->setf(format_flags , ios::adjustfield); + } + + if (nb_segment == 1) { + seq->one_segment_likelihood((index == I_DEFAULT ? index : 0) , model_type , common_contrast , + shape_parameter , rank); + seq->min_value[0] = 0; + seq->max_value[0] = 0; + seq->build_marginal_frequency_distribution(0); + } + + else { + seq->segmentation((index == I_DEFAULT ? index : 0) , nb_segment , model_type , common_contrast , + shape_parameter , rank); + } + + switch (output) { + + case SEQUENCE : { + oseq = seq->segmentation_output(nb_segment , model_type , common_contrast , os); + break; + } + + case SEGMENTATION_ENTROPY : { + for (i = 0;i < 4;i++) { + ilength[i] = max_nb_segment - 1; + } + itype[0] = REAL_VALUE; + + oseq = new Sequences(4 , NULL , ilength , NULL , TIME , 1 , itype); + + for (i = 2;i <= max_nb_segment;i++) { + oseq->index_parameter[0][i - 2] = i; + oseq->real_sequence[0][0][i - 2] = segmentation_entropy[i]; + oseq->index_parameter[1][i - 2] = i; + oseq->real_sequence[1][0][i - 2] = first_order_entropy[i]; + oseq->index_parameter[2][i - 2] = i; + oseq->real_sequence[2][0][i - 2] = change_point_entropy[i]; + oseq->index_parameter[3][i - 2] = i; + oseq->real_sequence[3][0][i - 2] = uniform_entropy[i]; + } + + oseq->build_index_parameter_frequency_distribution(); + oseq->index_interval_computation(); + + oseq->min_value_computation(0); + oseq->max_value_computation(0); + + oseq->build_marginal_histogram(0); + break; + } + + case SEGMENTATION_DIVERGENCE : { + ilength[0] = max_nb_segment; + itype[0] = REAL_VALUE; + + oseq = new Sequences(1 , NULL , ilength , NULL , TIME , 1 , itype); + + for (i = 1;i <= max_nb_segment;i++) { + oseq->index_parameter[0][i - 1] = i; + oseq->real_sequence[0][0][i - 1] = segmentation_divergence[i]; + } + + oseq->build_index_parameter_frequency_distribution(); + oseq->index_interval_computation(); + + oseq->min_value_computation(0); + oseq->max_value_computation(0); + + oseq->build_marginal_histogram(0); + break; + } + + case LOG_LIKELIHOOD_SLOPE : { + +# ifdef DEBUG + cout << "\n"; + for (i = 1;i <= max_nb_segment;i++) { + cout << i << "\t" << penalty_shape[i]; + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + cout << "\t" << likelihood[i] << "\t" << intercept + slope * penalty_shape[i]; + } + else { + cout << "\t" << segmentation_likelihood[i] << "\t" << segmentation_intercept + segmentation_slope * penalty_shape[i]; + } + cout << endl; + } +# endif + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + inb_sequence = 2; + } + else { + inb_sequence = 1; + } + + for (i = 0;i < inb_sequence;i++) { + ilength[i] = max_nb_segment; + } + itype[0] = REAL_VALUE; + itype[1] = AUXILIARY; + + oseq = new Sequences(inb_sequence , NULL , ilength , NULL , TIME , 2 , itype); + + switch (penalty_shape_type) { + case 0 : + scaling_factor = 1; + break; + case 1 : + scaling_factor = PENALTY_SHAPE_SCALING_FACTOR; + break; + case 2 : + scaling_factor = PENALTY_SHAPE_SCALING_FACTOR; + break; + default : + scaling_factor = PENALTY_SHAPE_SCALING_FACTOR; + break; + } + + for (i = 1;i <= max_nb_segment;i++) { + oseq->index_parameter[0][i - 1] = (int)::round(penalty_shape[i] * scaling_factor); + oseq->real_sequence[0][0][i - 1] = segmentation_likelihood[i]; + oseq->real_sequence[0][1][i - 1] = segmentation_intercept + segmentation_slope * penalty_shape[i]; + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + oseq->index_parameter[1][i - 1] = (int)::round(penalty_shape[i] * scaling_factor); + oseq->real_sequence[1][0][i - 1] = likelihood[i]; + oseq->real_sequence[1][1][i - 1] = intercept + slope * penalty_shape[i]; + } + } + + oseq->build_index_parameter_frequency_distribution(); + oseq->index_interval_computation(); + + oseq->min_value_computation(0); + oseq->max_value_computation(0); + oseq->min_value_computation(1); + oseq->max_value_computation(1); + + oseq->build_marginal_histogram(0); + break; + } + } + +# ifdef DEBUG +/* if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + hierarchical_segmentation(error , os , iidentifier , max_nb_segment , model_type); + } */ +# endif + + } + + else { + oseq = NULL; + error.update(SEQ_error[SEQR_SEGMENTATION_FAILURE]); + } + + if (max_nb_segment >= DIMENSION_JUMP_NB_SEGMENT) { + delete [] penalty_shape; + } + + for (i = 1;i < seq->nb_variable;i++) { + delete [] rank[i]; + } + delete [] rank; + + delete seq; + + delete [] segmentation_likelihood; + delete [] nb_parameter; + + if (bayesian) { + delete [] penalized_likelihood[2]; + } + + else { + delete [] segment_penalty; + + for (i = 0;i < 6;i++) { + delete [] penalized_likelihood[i]; + } + } + + delete [] penalized_likelihood; + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + delete [] likelihood; + delete [] segmentation_entropy; + delete [] first_order_entropy; + delete [] change_point_entropy; + delete [] uniform_entropy; + delete [] segmentation_divergence; + delete [] marginal_entropy; + } + } + + return oseq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Optimal segmentation of a single sequence or a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the results of multiple change-point inference, + * \param[in] iidentifier sequence identifier, + * \param[in] max_nb_segment maximum number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] criterion criterion for the selection of the number of segments, + * \param[in] min_nb_segment minimum number of segments, + * \param[in] penalty_shape_type penalty shape for the slope heuristic, + * \param[in] output output (sequence, entropies or Kullback-Leibler divergences). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation(StatError &error , ostream *os , int iidentifier , + int max_nb_segment , vector &model_type , + bool common_contrast , vector &shape_parameter , + model_selection_criterion criterion , int min_nb_segment , + int penalty_shape_type , sequence_type output) const + +{ + return segmentation(error , os , iidentifier , max_nb_segment , model_type.data() , + common_contrast , shape_parameter.data() , criterion , min_nb_segment , + penalty_shape_type , output); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of segment/state, change-point and entropy profiles for + * a single sequence or a sample of sequences (in the case of + * multiple change-point models). + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] nb_segment number of segments/states, + * \param[in] profiles pointer on the segment/state profiles, + * \param[in] label profile type label, + * \param[in] piecewise_function pointer on the piecewise linear functions, + * \param[in] change_point pointer on the change-point profiles, + * \param[in] segment_length pointer on the segment length distributions, + * \param[in] prior_segment_length pointer on the prior segment length distribution assuming a + * uniform prior on the possible segmentations, + * \param[in] begin_conditonal_entropy pointer on the profiles of entropies conditional on the past, + * \param[in] end_conditional_entropy pointer on the profiles of entropies conditional on the future, + * \param[in] change_point_entropy pointer on the change-point entropy profiles. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::profile_ascii_print(ostream &os , int index , int nb_segment , + double **profiles , const char *label , + double **piecewise_function , long double **change_point , + Distribution **segment_length , + Distribution *prior_segment_length , + long double **begin_conditonal_entropy , + long double **end_conditional_entropy , + long double **change_point_entropy) const + +{ + int i , j , k; + int seq_length , start , buff , max_nb_value , *seq_index_parameter , *width; + ios_base::fmtflags format_flags; + + + format_flags = os.flags(ios::adjustfield); + + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (i = 0;i < seq_length;i++) { + seq_index_parameter[i] = i; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + + // computation of the column width + + width = new int[2 * nb_variable + 6]; + + start = 0; + if (change_point) { + start++; + } + + for (i = start;i < nb_variable;i++) { + if (type[i] != REAL_VALUE) { + width[i] = stat_tool::column_width((int)min_value[i] , (int)max_value[i]); + } + + else { + if ((index == I_DEFAULT) && (nb_sequence * (nb_variable - 1) <= SEQUENCE_MAX_NB_COLUMN)) { + for (j = 0;j < nb_sequence;j++) { + buff = stat_tool::column_width(length[j] , real_sequence[j][i]); + if (buff > width[i]) { + width[i] = buff; + } + } + } + + else if (index != I_DEFAULT) { + width[i] = stat_tool::column_width(length[index] , real_sequence[index][i]); + } + } + + if ((i > start) || (index == I_DEFAULT)) { + width[i] += ASCII_SPACE; + } + } + + if (index_parameter) { + width[nb_variable] = stat_tool::column_width(index_parameter_distribution->nb_value - 1) + ASCII_SPACE; + } + else { + width[nb_variable] = stat_tool::column_width(seq_length) + ASCII_SPACE; + } + + width[nb_variable + 1] = 0; + for (i = 0;i < seq_length;i++) { + buff = stat_tool::column_width(nb_segment , profiles[i]); + if (buff > width[nb_variable + 1]) { + width[nb_variable + 1] = buff; + } + } + width[nb_variable + 1] += ASCII_SPACE; + + width[nb_variable + 2] = stat_tool::column_width(nb_sequence); + + if (piecewise_function) { + for (i = 1;i < nb_variable;i++) { + if (piecewise_function[i]) { + width[nb_variable + 2 + i] = stat_tool::column_width(seq_length , piecewise_function[i]) + ASCII_SPACE; + } + } + } + + if (change_point) { + width[2 * nb_variable + 2] = 0; + for (i = 1;i < nb_segment;i++) { + buff = column_width(seq_length , change_point[i]); + if (buff > width[2 * nb_variable + 2]) { + width[2 * nb_variable + 2] = buff; + } + } + width[2 * nb_variable + 2] += ASCII_SPACE; + } + + if (segment_length) { + width[2 * nb_variable + 3] = stat_tool::column_width(segment_length[0]->nb_value , segment_length[0]->mass); + for (i = 1;i < nb_segment;i++) { + buff = stat_tool::column_width(segment_length[i]->nb_value , segment_length[i]->mass); + if (buff > width[2 * nb_variable + 3]) { + width[2 * nb_variable + 3] = buff; + } + } + if (prior_segment_length) { + buff = stat_tool::column_width(prior_segment_length->nb_value , prior_segment_length->mass); + if (buff > width[2 * nb_variable + 3]) { + width[2 * nb_variable + 3] = buff; + } + } + width[2 * nb_variable + 3] += ASCII_SPACE; + + width[2 * nb_variable + 4] = stat_tool::column_width(segment_length[0]->nb_value , segment_length[0]->cumul); + for (i = 1;i < nb_segment;i++) { + buff = stat_tool::column_width(segment_length[i]->nb_value , segment_length[i]->cumul); + if (buff > width[2 * nb_variable + 4]) { + width[2 * nb_variable + 4] = buff; + } + } + if (prior_segment_length) { + buff = stat_tool::column_width(prior_segment_length->nb_value , prior_segment_length->cumul); + if (buff > width[2 * nb_variable + 4]) { + width[2 * nb_variable + 4] = buff; + } + } + width[2 * nb_variable + 4] += ASCII_SPACE; + } + + if ((begin_conditonal_entropy) && (end_conditional_entropy) && + (change_point_entropy)) { + width[2 * nb_variable + 5] = 0; + + for (i = 1;i < nb_segment;i++) { + buff = column_width(seq_length , begin_conditonal_entropy[i]); + if (buff > width[2 * nb_variable + 5]) { + width[2 * nb_variable + 5] = buff; + } + } + + for (i = 1;i < nb_segment;i++) { + buff = column_width(seq_length , end_conditional_entropy[i]); + if (buff > width[2 * nb_variable + 5]) { + width[2 * nb_variable + 5] = buff; + } + } + + for (i = 1;i < nb_segment;i++) { + buff = column_width(seq_length , change_point_entropy[i]); + if (buff > width[2 * nb_variable + 5]) { + width[2 * nb_variable + 5] = buff; + } + } + + width[2 * nb_variable + 5] += ASCII_SPACE; + } + + if (!change_point) { + os << SEQ_label[SEQL_OPTIMAL] << " " << label << " | "; + } + for (i = 1;i < nb_variable;i++) { + if ((index == I_DEFAULT) && (nb_sequence * (nb_variable - 1) <= SEQUENCE_MAX_NB_COLUMN)) { + for (j = 0;j < nb_sequence;j++) { + os << STAT_label[STATL_VARIABLE] << " " << i << " | "; + } + } + else if (index != I_DEFAULT) { + os << STAT_label[STATL_VARIABLE] << " " << i << " | "; + } + + if ((piecewise_function) && (piecewise_function[i])) { + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << " " << i << " | "; + } + } + + switch (index_param_type) { + case TIME : + os << SEQ_label[SEQL_TIME]; + break; + case POSITION : + os << SEQ_label[SEQL_POSITION]; + break; + default : + os << SEQ_label[SEQL_INDEX]; + break; + } + + for (i = 0;i < nb_segment;i++) { + os << " | " << label << " " << i; + } + if (change_point) { + os << " "; + for (i = 1;i < nb_segment;i++) { + os << " | " << i + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + } + } + os << endl; + + for (i = 0;i < seq_length;i++) { + os.setf(ios::right , ios::adjustfield); + + if (!change_point) { + os << setw(width[0]) << int_sequence[index == I_DEFAULT ? 0 : index][0][i]; + } + + for (j = 1;j < nb_variable;j++) { + if ((index == I_DEFAULT) && (nb_sequence * (nb_variable - 1) <= SEQUENCE_MAX_NB_COLUMN)) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < nb_sequence;k++) { + os << setw(width[j]) << int_sequence[k][j][i]; + } + } + else { + for (k = 0;k < nb_sequence;k++) { + os << setw(width[j]) << real_sequence[k][j][i]; + } + } + } + + else if (index != I_DEFAULT) { + if (type[j] != REAL_VALUE) { + os << setw(width[j]) << int_sequence[index][j][i]; + } + else { + os << setw(width[j]) << real_sequence[index][j][i]; + } + } + + if ((piecewise_function) && (piecewise_function[j])) { + os << setw(width[nb_variable + 2 + j]) << piecewise_function[j][i]; + } + } + + os << setw(width[nb_variable]) << seq_index_parameter[i] << " "; + + os.setf(ios::left , ios::adjustfield); + for (j = 0;j < nb_segment;j++) { + os << setw(width[nb_variable + 1]) << profiles[i][j]; + } + + if (change_point) { + os << " "; + for (j = 1;j < nb_segment;j++) { + os << setw(width[2 * nb_variable + 2]) << change_point[j][i]; + } + } + + if (i == 0) { + os.setf(ios::right , ios::adjustfield); + if (index != I_DEFAULT) { + os << setw(width[nb_variable + 2]) << identifier[index]; + } + } + os << endl; + } + + if (segment_length) { + if (prior_segment_length) { + max_nb_value = prior_segment_length->nb_value; + } + + else { + max_nb_value = segment_length[0]->nb_value; + for (i = 1;i < nb_segment;i++) { + if (segment_length[i]->nb_value > max_nb_value) { + max_nb_value = segment_length[i]->nb_value; + } + } + } + + os << "\n\n" << SEQ_label[SEQL_SEGMENT_LENGTH] << " " << STAT_label[STATL_DISTRIBUTIONS] << endl; + + for (i = 0;i < nb_segment;i++) { + os << "\n" << SEQ_label[SEQL_SEGMENT] << " " << i << " " << SEQ_label[SEQL_LENGTH] + << " " << STAT_label[STATL_DISTRIBUTION] << endl; + segment_length[i]->ascii_characteristic_print(os , true); + } + if (prior_segment_length) { + os << "\n" << SEQ_label[SEQL_PRIOR_SEGMENT_LENGTH] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + prior_segment_length->ascii_characteristic_print(os , true); + } + + os << "\n "; + for (i = 0;i < nb_segment;i++) { + os << " | " << SEQ_label[SEQL_SEGMENT] << " " << i << " " << SEQ_label[SEQL_LENGTH] + << " " << STAT_label[STATL_DISTRIBUTION]; + } + if (prior_segment_length) { + os << " | " << SEQ_label[SEQL_PRIOR_SEGMENT_LENGTH] << " " << STAT_label[STATL_DISTRIBUTION]; + } + for (i = 0;i < nb_segment;i++) { + os << " | " << SEQ_label[SEQL_SEGMENT] << " " << i << " " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION]; + } + if (prior_segment_length) { + os << " | " << SEQ_label[SEQL_PRIOR_SEGMENT_LENGTH] << " " << i << " " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION]; + } + os << endl; + + for (i = 0;i < max_nb_value;i++) { + os << setw(width[nb_variable]) << i; + + for (j = 0;j < nb_segment;j++) { + if (i < segment_length[j]->nb_value) { + os << setw(width[2 * nb_variable + 3]) << segment_length[j]->mass[i]; + } + else { + os << setw(width[2 * nb_variable + 3]) << " "; + } + } + if (prior_segment_length) { + os << setw(width[2 * nb_variable + 3]) << prior_segment_length->mass[i]; + } + for (j = 0;j < nb_segment;j++) { + if (i < segment_length[j]->nb_value) { + os << setw(width[2 * nb_variable + 4]) << segment_length[j]->cumul[i]; + } + else { + os << setw(width[2 * nb_variable + 4]) << " "; + } + } + if (prior_segment_length) { + os << setw(width[2 * nb_variable + 4]) << prior_segment_length->cumul[i]; + } + os << endl; + } + os << endl; + } + + if ((begin_conditonal_entropy) && (end_conditional_entropy) && + (change_point_entropy)) { + os << "\n" << SEQ_label[SEQL_BEGIN_CONDITIONAL_ENTROPY] << ", " + << SEQ_label[SEQL_END_CONDITIONAL_ENTROPY] << ", " + << SEQ_label[SEQL_CHANGE_POINT_ENTROPY] << endl; + + os << "\n"; + switch (index_param_type) { + case TIME : + os << SEQ_label[SEQL_TIME]; + break; + case POSITION : + os << SEQ_label[SEQL_POSITION]; + break; + default : + os << SEQ_label[SEQL_INDEX]; + break; + } + + for (i = 1;i < nb_segment;i++) { + os << " | " << i + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + } + os << " "; + for (i = 1;i < nb_segment;i++) { + os << " | " << i + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + } + os << " "; + for (i = 1;i < nb_segment;i++) { + os << " | " << i + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + } + os << endl; + + buff = width[nb_variable] - ASCII_SPACE; + + for (i = 0;i < seq_length;i++) { + os.setf(ios::right , ios::adjustfield); + os << setw(buff) << seq_index_parameter[i] << " "; + + os.setf(ios::left , ios::adjustfield); + for (j = 1;j < nb_segment;j++) { + os << setw(width[2 * nb_variable + 5]) << begin_conditonal_entropy[j][i]; + } + os << " "; + for (j = 1;j < nb_segment;j++) { + os << setw(width[2 * nb_variable + 5]) << end_conditional_entropy[j][i]; + } + os << " "; + for (j = 1;j < nb_segment;j++) { + os << setw(width[2 * nb_variable + 5]) << change_point_entropy[j][i]; + } + + if (i == 0) { + os.setf(ios::right , ios::adjustfield); + if (index != I_DEFAULT) { + os << setw(width[nb_variable + 2]) << identifier[index]; + } + } + os << endl; + } + } + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + delete [] width; + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of segment/state, change-point and entropy profiles for + * a single sequence or a sample of sequences (in the case of + * multiple change-point models) at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] nb_segment number of segments/states, + * \param[in] profiles pointer on the segment/state profiles, + * \param[in] label profile type label, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] piecewise_function pointer on the piecewise linear functions, + * \param[in] change_point pointer on the change-point profiles, + * \param[in] segment_length pointer on the segment length distributions, + * \param[in] prior_segment_length pointer on the prior segment length distribution assuming a + * uniform prior on the possible segmentations, + * \param[in] begin_conditonal_entropy pointer on the profiles of entropies conditional on the past, + * \param[in] end_conditional_entropy pointer on the profiles of entropies conditional on the future, + * \param[in] change_point_entropy pointer on the change-point entropy profiles. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::profile_spreadsheet_print(ostream &os , int index , int nb_segment , + double **profiles , const char *label , + bool common_contrast , double ***piecewise_function , + long double **change_point , + Distribution **segment_length , + Distribution *prior_segment_length , + long double **begin_conditonal_entropy , + long double **end_conditional_entropy , + long double **change_point_entropy) const + +{ + int i , j , k; + int seq_length , max_nb_value , *seq_index_parameter; + + + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (i = 0;i < seq_length;i++) { + seq_index_parameter[i] = i; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + + if (!change_point) { + os << SEQ_label[SEQL_OPTIMAL] << " " << label << "\t"; + } + + for (i = 1;i < nb_variable;i++) { +// if ((index == I_DEFAULT) && (nb_sequence * (nb_variable - 1) <= SEQUENCE_MAX_NB_COLUMN)) { + if (index == I_DEFAULT) { + if (common_contrast) { + for (j = 0;j < nb_sequence;j++) { + os << STAT_label[STATL_VARIABLE] << " " << i << "\t"; + } + if ((piecewise_function) && (piecewise_function[i])) { + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << " " << i << "\t"; + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + os << STAT_label[STATL_VARIABLE] << " " << i << "\t"; + if ((piecewise_function) && (piecewise_function[i])) { + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << " " << i << "\t"; + } + } + } + } + +// else if (index != I_DEFAULT) { + else { + os << STAT_label[STATL_VARIABLE] << " " << i << "\t"; + if ((piecewise_function) && (piecewise_function[i])) { + os << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << " " << i << "\t"; + } + } + } + + switch (index_param_type) { + case TIME : + os << SEQ_label[SEQL_TIME]; + break; + case POSITION : + os << SEQ_label[SEQL_POSITION]; + break; + default : + os << SEQ_label[SEQL_INDEX]; + break; + } + + for (i = 0;i < nb_segment;i++) { + os << "\t" << label << " " << i; + } + if (change_point) { + os << "\t"; + for (i = 1;i < nb_segment;i++) { + os << "\t" << i + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + } + } + os << endl; + + for (i = 0;i < seq_length;i++) { + if (!change_point) { + os << int_sequence[index == I_DEFAULT ? 0 : index][0][i] << "\t"; + } + + for (j = 1;j < nb_variable;j++) { +// if ((index == I_DEFAULT) && (nb_sequence * (nb_variable - 1) <= SEQUENCE_MAX_NB_COLUMN)) { + if (index == I_DEFAULT) { + if (common_contrast) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < nb_sequence;k++) { + os << int_sequence[k][j][i] << "\t"; + } + } + else { + for (k = 0;k < nb_sequence;k++) { + os << real_sequence[k][j][i] << "\t"; + } + } + if ((piecewise_function) && (piecewise_function[j])) { + os << piecewise_function[j][0][i] << "\t"; + } + } + + else { + if (type[j] != REAL_VALUE) { + for (k = 0;k < nb_sequence;k++) { + os << int_sequence[k][j][i] << "\t"; + if ((piecewise_function) && (piecewise_function[j])) { + os << piecewise_function[j][k][i] << "\t"; + } + } + } + else { + for (k = 0;k < nb_sequence;k++) { + os << real_sequence[k][j][i] << "\t"; + if ((piecewise_function) && (piecewise_function[j])) { + os << piecewise_function[j][k][i] << "\t"; + } + } + } + } + } + +// else if (index != I_DEFAULT) { + else { + if (type[j] != REAL_VALUE) { + os << int_sequence[index][j][i] << "\t"; + } + else { + os << real_sequence[index][j][i] << "\t"; + } + if ((piecewise_function) && (piecewise_function[j])) { + os << piecewise_function[j][index][i] << "\t"; + } + } + } + + os << seq_index_parameter[i]; + for (j = 0;j < nb_segment;j++) { + os << "\t" << profiles[i][j]; + } + + if (change_point) { + os << "\t"; + for (j = 1;j < nb_segment;j++) { + os << "\t" << change_point[j][i]; + } + } + + if ((index != I_DEFAULT) && (i == 0)) { + os << "\t" << identifier[index]; + } + os << endl; + } + + if (segment_length) { + if (prior_segment_length) { + max_nb_value = prior_segment_length->nb_value; + } + + else { + max_nb_value = segment_length[0]->nb_value; + for (i = 1;i < nb_segment;i++) { + if (segment_length[i]->nb_value > max_nb_value) { + max_nb_value = segment_length[i]->nb_value; + } + } + } + + os << "\n\n" << SEQ_label[SEQL_SEGMENT_LENGTH] << " " << STAT_label[STATL_DISTRIBUTIONS] << endl; + + for (i = 0;i < nb_segment;i++) { + os << "\n" << SEQ_label[SEQL_SEGMENT] << " " << i << " " << SEQ_label[SEQL_LENGTH] + << " " << STAT_label[STATL_DISTRIBUTION] << endl; + segment_length[i]->spreadsheet_characteristic_print(os , true); + } + if (prior_segment_length) { + os << "\n" << SEQ_label[SEQL_PRIOR_SEGMENT_LENGTH] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + prior_segment_length->spreadsheet_characteristic_print(os , true); + } + + os << "\n"; + for (i = 0;i < nb_segment;i++) { + os << "\t" << SEQ_label[SEQL_SEGMENT] << " " << i << " " << SEQ_label[SEQL_LENGTH] + << " " << STAT_label[STATL_DISTRIBUTION]; + } + if (prior_segment_length) { + os << "\t" << SEQ_label[SEQL_PRIOR_SEGMENT_LENGTH] << " " << STAT_label[STATL_DISTRIBUTION]; + } + for (i = 0;i < nb_segment;i++) { + os << "\t" << SEQ_label[SEQL_SEGMENT] << " " << i << " " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION]; + } + if (prior_segment_length) { + os << "\t" << SEQ_label[SEQL_PRIOR_SEGMENT_LENGTH] << " " << i << " " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION]; + } + os << endl; + + for (i = 0;i < max_nb_value;i++) { + os << i; + + for (j = 0;j < nb_segment;j++) { + os << "\t"; + if (i < segment_length[j]->nb_value) { + os << segment_length[j]->mass[i]; + } + } + if (prior_segment_length) { + os << "\t" << prior_segment_length->mass[i]; + } + for (j = 0;j < nb_segment;j++) { + os << "\t"; + if (i < segment_length[j]->nb_value) { + os << segment_length[j]->cumul[i]; + } + } + if (prior_segment_length) { + os << "\t" << prior_segment_length->cumul[i]; + } + os << endl; + } + os << endl; + } + + if ((begin_conditonal_entropy) && (end_conditional_entropy) && + (change_point_entropy)) { + os << "\n" << SEQ_label[SEQL_BEGIN_CONDITIONAL_ENTROPY]; + for (i = 1;i < nb_segment;i++) { + os << "\t"; + } + os << SEQ_label[SEQL_END_CONDITIONAL_ENTROPY]; + for (i = 1;i < nb_segment;i++) { + os << "\t"; + } + os << SEQ_label[SEQL_CHANGE_POINT_ENTROPY] << endl; + + os << "\n"; + switch (index_param_type) { + case TIME : + os << SEQ_label[SEQL_TIME]; + break; + case POSITION : + os << SEQ_label[SEQL_POSITION]; + break; + default : + os << SEQ_label[SEQL_INDEX]; + break; + } + + for (i = 1;i < nb_segment;i++) { + os << "\t" << i + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + } + os << "\t"; + for (i = 1;i < nb_segment;i++) { + os << "\t" << i + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + } + os << "\t"; + for (i = 1;i < nb_segment;i++) { + os << "\t" << i + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + } + os << endl; + + for (i = 0;i < seq_length;i++) { + os << seq_index_parameter[i]; + + for (j = 1;j < nb_segment;j++) { + os << "\t" << begin_conditonal_entropy[j][i]; + } + os << "\t"; + for (j = 1;j < nb_segment;j++) { + os << "\t" << end_conditional_entropy[j][i]; + } + os << "\t"; + for (j = 1;j < nb_segment;j++) { + os << "\t" << change_point_entropy[j][i]; + } + + if ((index != I_DEFAULT) && (i == 0)) { + os << "\t" << identifier[index]; + } + os << endl; + } + } + + if ((index != I_DEFAULT) && (index_param_type == IMPLICIT_TYPE)) { + delete [] seq_index_parameter; + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of segment/state, change-point and entropy profiles for + * a single sequence or a sample of sequences (in the case of + * multiple change-point models) at the Gnuplot format. + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] nb_segment number of segments/states, + * \param[in] profiles pointer on the segment/state profiles, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] piecewise_function pointer on the piecewise linear functions, + * \param[in] change_point pointer on the change-point profiles, + * \param[in] segment_length pointer on the segment length distributions, + * \param[in] prior_segment_length pointer on the prior segment length distribution assuming a + * uniform prior on the possible segmentations, + * \param[in] begin_conditonal_entropy pointer on the profiles of entropies conditional on the past, + * \param[in] end_conditional_entropy pointer on the profiles of entropies conditional on the future, + * \param[in] change_point_entropy pointer on the change-point entropy profiles. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::profile_plot_print(ostream &os , int index , int nb_segment , + double **profiles , bool common_contrast , + double ***piecewise_function , long double **change_point , + Distribution **segment_length , + Distribution *prior_segment_length , + long double **begin_conditonal_entropy , + long double **end_conditional_entropy , + long double **change_point_entropy) const + +{ + int i , j , k; + int seq_length , *seq_index_parameter; + + + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (i = 0;i < seq_length;i++) { + seq_index_parameter[i] = i; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + + for (i = 0;i < seq_length;i++) { + os << seq_index_parameter[i] << " "; + + for (j = 1;j < nb_variable;j++) { + if ((piecewise_function) && (piecewise_function[j])) { + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < nb_sequence;k++) { + if ((index == I_DEFAULT) || (index == k)) { + os << int_sequence[k][j][i] << " " << piecewise_function[j][k][i] << " "; + } + } + } + else { + for (k = 0;k < nb_sequence;k++) { + if ((index == I_DEFAULT) || (index == k)) { + os << real_sequence[k][j][i] << " " << piecewise_function[j][k][i] << " "; + } + } + } + } + + else { + if (type[j] != REAL_VALUE) { + for (k = 0;k < nb_sequence;k++) { + os << int_sequence[k][j][i] << " "; + } + } + else { + for (k = 0;k < nb_sequence;k++) { + os << real_sequence[k][j][i] << " "; + } + } + + os << piecewise_function[j][0][i] << " "; + } + } + } + + for (j = 0;j < nb_segment;j++) { + os << profiles[i][j] << " "; + } + + if (change_point) { + for (j = 1;j < nb_segment;j++) { + os << change_point[j][i] << " "; + } + } + + if (segment_length) { + for (j = 0;j < nb_segment;j++) { + if (i < segment_length[j]->nb_value) { + os << segment_length[j]->mass[i] << " "; + } + else { + os << 0. << " "; + } + } + if (prior_segment_length) { + if (i < prior_segment_length->nb_value) { + os << prior_segment_length->mass[i] << " "; + } + else { + os << 0. << " "; + } + } + +/* for (j = 0;j < nb_segment;j++) { + if (i < segment_length[j]->nb_value) { + os << segment_length[j]->cumul[i] << " "; + } + else { + os << 1. << " "; + } + } + if (prior_segment_length) { + if (i < prior_segment_length->nb_value) { + os << prior_segment_length->cumul[i] << " "; + } + else { + os << 1. << " "; + } + } */ + } + + if ((begin_conditonal_entropy) && (end_conditional_entropy) && + (change_point_entropy)) { + for (j = 1;j < nb_segment;j++) { + os << begin_conditonal_entropy[j][i] << " "; + } + for (j = 1;j < nb_segment;j++) { + os << end_conditional_entropy[j][i] << " "; + } +/* for (j = 1;j < nb_segment;j++) { + os << change_point_entropy[j][i] << " "; + } */ + os << change_point_entropy[nb_segment - 1][i]; + } + os << endl; + } + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of change-point profiles for a single sequence or + * a sample of sequences (in the case of multiple change-point models) + * at the "plotable" format. + * + * \param[in] plot reference on a MultiPlot object, + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] change_point pointer on the change-point profiles. + */ +/*--------------------------------------------------------------*/ + +void Sequences::change_point_profile_plotable_write(MultiPlot &plot , int index , int nb_segment , + long double **change_point) const + +{ + int i , j , k; + int seq_length , *seq_index_parameter; + + + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (i = 0;i < seq_length;i++) { + seq_index_parameter[i] = i; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + + plot.resize(MAX(nb_segment - 1 , 3)); + i = 0; + + for (j = MAX(1 , nb_segment - 3);j < nb_segment;j++) { + for (k = 0;k < seq_length;k++) { + plot[i].add_point(seq_index_parameter[k] , change_point[j][k]); + } + i++; + } + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of segment length distributions including the prior segment length + * distribution corresponding to a uniform prior on the possible segmentations + * at the "plotable" format. + * + * \param[in] plot reference on a MultiPlot object, + * \param[in] nb_segment number of segments, + * \param[in] segment_length_max maximum probability of segment length distribution, + * \param[in] segment_length pointer on the segment length distributions, + * \param[in] prior_segment_length pointer on the prior segment length distribution. + */ +/*--------------------------------------------------------------*/ + +void Sequences::segment_length_distribution_plotable_write(MultiPlot &plot , int nb_segment , + double segment_length_max , + Distribution **segment_length , + Distribution *prior_segment_length) const + +{ + int i; + int max_nb_value; + + + if (prior_segment_length) { + max_nb_value = prior_segment_length->nb_value; + + plot.resize(nb_segment + 1); + } + + else { + max_nb_value = segment_length[0]->nb_value; + for (i = 1;i < nb_segment;i++) { + if (segment_length[i]->nb_value > max_nb_value) { + max_nb_value = segment_length[i]->nb_value; + } + } + + plot.resize(nb_segment); + } + + plot.xrange = Range(0 , max_nb_value - 1); + if (max_nb_value - 1 < TIC_THRESHOLD) { + plot.xtics = 1; + } + + plot.yrange = Range(0 , segment_length_max * YSCALE); + + for (i = 0;i < nb_segment;i++) { + segment_length[i]->plotable_mass_write(plot[i]); + } + if (prior_segment_length) { + prior_segment_length->plotable_mass_write(plot[nb_segment]); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of entropy profiles for a single sequence or + * a sample of sequences (in the case of multiple change-point models) + * at the "plotable" format. + * + * \param[in] plot reference on a MultiPlot object, + * \param[in] index sequence index, + * \param[in] begin_conditonal_entropy pointer on the profiles of entropies conditional on the past, + * \param[in] end_conditional_entropy pointer on the profiles of entropies conditional on the future, + * \param[in] change_point_entropy pointer on the change-point entropy profiles. + */ +/*--------------------------------------------------------------*/ + +void Sequences::entropy_profile_plotable_write(MultiPlot &plot , int index , + long double *begin_conditional_entropy , + long double *end_conditional_entropy , + long double *change_point_entropy) const + +{ + int i; + int seq_length , *seq_index_parameter; + + + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (i = 0;i < seq_length;i++) { + seq_index_parameter[i] = i; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + + plot.resize(3); + + for (i = 0;i < seq_length;i++) { + plot[0].add_point(seq_index_parameter[i] , begin_conditional_entropy[i]); + plot[1].add_point(seq_index_parameter[i] , end_conditional_entropy[i]); + plot[2].add_point(seq_index_parameter[i] , change_point_entropy[i]); + } + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation by summation of segments/change-point profiles for + * a single sequence or a sample of sequences and of entropy profiles. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] rank ranks (for ordinal variables), + * \param[in] os stream, + * \param[in] plot_set pointer on a MultiPlotSet object, + * \param[in] segment_length_max maximum probability of segment length distribution, + * \param[in] output output type, + * \param[in] format output format (ASCII/SPREADSHEET/GNUPLOT/PLOT). + * + * \return log-likelihood of the multiple change-point model. + */ +/*--------------------------------------------------------------*/ + +double Sequences::forward_backward(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + double **rank , ostream *os , MultiPlotSet *plot_set , + double &segment_length_max , change_point_profile output , + output_format format) const + +{ + int i , j , k , m; + int seq_length , inf_bound , *inf_bound_parameter , *seq_index_parameter; + double sum , buff , rlikelihood , *likelihood , **seq_mean , **hyperparam , **backward_output , + ***factorial , ***binomial_coeff , ***smoothed; + long double segment_norm , sequence_norm , lbuff , lsum , segmentation_entropy , first_order_entropy , + change_point_entropy_sum , marginal_entropy , *contrast , *normalized_contrast , + *norm , *forward_norm , *backward_norm , *entropy_smoothed , *segment_predicted , + **forward , **backward , **change_point , **forward_predicted_entropy , + **backward_predicted_entropy , **forward_partial_entropy , **backward_partial_entropy , + **change_point_entropy , ***state_entropy; + Distribution **segment_length; + DiscreteParametric *prior_segment_length; + +# ifdef DEBUG + long double *entropy_norm; +# endif + + + factorial = new double**[nb_variable]; + inf_bound_parameter = new int[nb_variable]; + binomial_coeff = new double**[nb_variable]; + seq_mean = new double*[nb_variable]; + seq_index_parameter = NULL; + hyperparam = new double*[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + + // computation of log of factorials for Poisson models + + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + factorial[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + factorial[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + factorial[i][j][k] = log_factorial(int_sequence[j][i][k]); + } + } + else { + factorial[i][j] = NULL; + } + } + } + + else { + factorial[i] = NULL; + } + + // computation of log of binomial coefficients for negative binomial models + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + + binomial_coeff[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + binomial_coeff[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + binomial_coeff[i][j][k] = log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[j][i][k]); + } + } + else { + binomial_coeff[i][j] = NULL; + } + } + } + + else { + binomial_coeff[i] = NULL; + } + + // computation of sequence means for Gaussian change in the variance models or + // stationary piecewise autoregressive models + + if ((model_type[i - 1] == VARIANCE_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + seq_mean[i] = new double[nb_sequence]; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += int_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += real_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + } + + else { + seq_mean[i] = new double[1]; + seq_mean[i][0] = 0.; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += int_sequence[k][i][j]; + } + } + } + + else { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += real_sequence[k][i][j]; + } + } + } + + seq_mean[i][0] /= (nb_sequence * length[0]); + } + } + + else { + seq_mean[i] = NULL; + } + + if (((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (output == CHANGE_POINT)) && (!seq_index_parameter)) { + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + } + + // computation of hyperparameters for Bayesian Poisson and Gaussian models + + if (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) { + hyperparam[i] = new double[2]; + gamma_hyperparameter_computation(index , i , hyperparam[i]); + +# ifdef MESSAGE + cout << "\nGamma hyperparameters: " << hyperparam[i][0] << " " << hyperparam[i][1] << endl; +# endif + + } + + else if (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) { + hyperparam[i] = new double[4]; + gaussian_gamma_hyperparameter_computation(index , i , hyperparam[i]); + +# ifdef MESSAGE + cout << "\nGaussian gamma hyperparameters: " << hyperparam[i][0] << " " << hyperparam[i][1] + << " " << hyperparam[i][2] << " " << hyperparam[i][3] << endl; +# endif + + } + + else { + hyperparam[i] = NULL; + } + } + + seq_length = length[index == I_DEFAULT ? 0 : index]; + contrast = new long double[seq_length]; + normalized_contrast = new long double[seq_length]; + + forward = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + forward[i] = new long double[nb_segment]; + } + + segment_predicted = new long double[seq_length]; + + forward_predicted_entropy = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + forward_predicted_entropy[i] = new long double[nb_segment]; + } + + norm = new long double[seq_length]; + forward_norm = new long double[seq_length]; +// entropy_norm = new long double[seq_length]; + + likelihood = new double[nb_segment]; + + backward = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + backward[i] = new long double[nb_segment]; + } + + backward_predicted_entropy = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + backward_predicted_entropy[i] = new long double[nb_segment]; + } + + backward_norm = new long double[seq_length]; + + smoothed = new double**[nb_segment]; + for (i = 1;i < nb_segment;i++) { + smoothed[i] = new double*[seq_length]; + for (j = 0;j < seq_length;j++) { + smoothed[i][j] = new double[nb_segment]; + } + } + + backward_output = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + backward_output[i] = new double[nb_segment]; + } + + segment_length = new Distribution*[nb_segment]; + for (i = 0;i < nb_segment;i++) { + segment_length[i] = new Distribution(seq_length); + for (j = 0;j < seq_length;j++) { + segment_length[i]->mass[j] = 0.; + } + } + + change_point = new long double*[nb_segment]; + for (i = 1;i < nb_segment;i++) { + change_point[i] = new long double[seq_length]; + } + + entropy_smoothed = new long double[nb_segment]; + + state_entropy = new long double**[nb_segment]; + for (i = 1;i < nb_segment;i++) { + state_entropy[i] = new long double*[seq_length]; + for (j = 0;j < seq_length;j++) { + state_entropy[i][j] = new long double[nb_segment]; + } + } + + forward_partial_entropy = new long double*[nb_segment]; + for (i = 1;i < nb_segment;i++) { + forward_partial_entropy[i] = new long double[seq_length]; + } + + backward_partial_entropy = new long double*[nb_segment]; + for (i = 1;i < nb_segment;i++) { + backward_partial_entropy[i] = new long double[seq_length]; + } + + change_point_entropy = new long double*[nb_segment]; + for (i = 1;i < nb_segment;i++) { + change_point_entropy[i] = new long double[seq_length]; + } + + // forward recurrence + + for (i = 0;i < seq_length;i++) { + + // computation of segment log-likelihoods + + forward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + // recurrence and computation of predicted entropies + + if (contrast[i] != D_INF) { + contrast[i] = expl(contrast[i]); + } + else { + contrast[i] = 0.; + } + + segment_norm = 0.; + for (j = i - 1;j >= 0;j--) { + segment_norm += norm[j]; + +# ifdef DEBUG + if (i == seq_length - 1) { + cout << j << ": " << contrast[j] << " " << segment_norm << " | "; + } +# endif + + if (contrast[j] != D_INF) { + contrast[j] = expl(contrast[j] - segment_norm); + } + else { + contrast[j] = 0.; + } + +# ifdef DEBUG + if (i == seq_length - 1) { + cout << contrast[j]; + if (j > 0) { + cout << " " << forward[j - 1][nb_segment - 2] << " | " + << contrast[j] * forward[j - 1][nb_segment - 2]; + } + cout << endl; + } +# endif + + } + +# ifdef DEBUG + if (i == seq_length - 1) { + cout << endl; + } +# endif + + for (j = 0;j < nb_segment;j++) { + forward[i][j] = 0.; + forward_predicted_entropy[i][j] = 0.; + } + norm[i] = 0.; + +// for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + forward[i][j] = contrast[0]; + } + + else { + for (k = i;k >= j;k--) { + segment_predicted[k] = contrast[k] * forward[k - 1][j - 1]; + forward[i][j] += segment_predicted[k]; + } + + if (forward[i][j] > 0.) { + for (k = i;k >= j;k--) { + lbuff = segment_predicted[k] / forward[i][j]; + if (lbuff > 0.) { + forward_predicted_entropy[i][j] += lbuff * (forward_predicted_entropy[k - 1][j - 1] - logl(lbuff)); + } + } + } + } + + norm[i] += forward[i][j]; + } + + if (norm[i] > 0.) { +// for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + forward[i][j] /= norm[i]; + } + + norm[i] = logl(norm[i]); + } +// entropy_norm[i] = norm[i]; + + forward_norm[i] = segment_norm + norm[i]; + } + +# ifdef DEBUG + cout << "\n"; + for (i = 0;i < seq_length;i++) { + cout << i << " |"; + lsum = 0.; + for (j = 0;j < nb_segment;j++) { + lsum += forward[i][j]; + cout << " " << forward[i][j]; + } + cout << " | " << lsum << ", " << expl(norm[i]) << endl; + } +# endif + +# ifdef DEBUG + cout << "\n"; + for (i = 0;i < seq_length;i++) { + cout << i << " |"; + for (j = 0;j < nb_segment;j++) { + cout << " " << forward_predicted_entropy[i][j]; + } + cout << endl; + } +# endif + + // extraction of the log-likelihoods of the observed sequence for the different numbers of segments + + for (i = 0;i < nb_segment;i++) { + if (forward[seq_length - 1][i] > 0.) { + likelihood[i] = logl(forward[seq_length - 1][i]) + forward_norm[seq_length - 1]; + } + else { + likelihood[i] = D_INF; + } + } + + rlikelihood = likelihood[nb_segment - 1]; + + if (rlikelihood != D_INF) { + +# ifdef MESSAGE + segmentation_entropy = rlikelihood; +# endif + + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < seq_length;j++) { + for (k = 0;k < nb_segment;k++) { + state_entropy[i][j][k] = 0.; + } + } + } + + // backward recurrence + + for (i = seq_length - 1;i >= 0;i--) { + + // computation of segment log-likelihoods + + backward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + // recurrence and computation of predicted entropies + + if (contrast[i] != D_INF) { + normalized_contrast[i] = expl(contrast[i]); + } + else { + normalized_contrast[i] = 0.; + } + + segment_norm = 0.; + for (j = i + 1;j < seq_length;j++) { + segment_norm += norm[j]; + if (contrast[j] != D_INF) { + normalized_contrast[j] = expl(contrast[j] - segment_norm); + } + else { + normalized_contrast[j] = 0.; + } + } + + for (j = 0;j < nb_segment;j++) { + backward[i][j] = 0.; + backward_predicted_entropy[i][j] = 0.; + backward_output[i][j] = 0.; + + for (k = 1;k < nb_segment;k++) { + smoothed[k][i][j] = 0.; + } + } + norm[i] = 0.; + +// for (j = MAX((i == 0 ? 0 : 1) , nb_segment + i - seq_length);j < MIN(nb_segment , i + 1);j++) { + for (j = MAX((i == 0 ? 0 : 1) , nb_segment + i - seq_length);j < nb_segment;j++) { + if (j < nb_segment - 1) { + for (k = i;k <= seq_length + j - nb_segment;k++) { + segment_predicted[k] = normalized_contrast[k] * backward[k + 1][j + 1]; + backward[i][j] += segment_predicted[k]; + } + + if (backward[i][j] > 0.) { + for (k = i;k <= seq_length + j - nb_segment;k++) { + lbuff = segment_predicted[k] / backward[i][j]; + if (lbuff > 0.) { + backward_predicted_entropy[i][j] += lbuff * (backward_predicted_entropy[k + 1][j + 1] - logl(lbuff)); + } + } + } + } + + else { + backward[i][j] = normalized_contrast[seq_length - 1]; + } + + norm[i] += backward[i][j]; + } + + if (norm[i] > 0.) { +// for (j = MAX((i == 0 ? 0 : 1) , nb_segment + i - seq_length);j < MIN(nb_segment , i + 1);j++) { + for (j = MAX((i == 0 ? 0 : 1) , nb_segment + i - seq_length);j < nb_segment;j++) { + backward[i][j] /= norm[i]; + } + + norm[i] = logl(norm[i]); + } + + backward_norm[i] = segment_norm + norm[i]; + + // extraction of the smoothed probabilities for the different numbers of segments + + if (i < seq_length - 1) { + for (j = 1;j < nb_segment;j++) { + sequence_norm = expl(forward_norm[i] + backward_norm[i + 1] - likelihood[j]); + + for (k = MAX(0 , j + i - seq_length - 1);k <= MIN(j , i);k++) { + smoothed[j][i][k] = smoothed[j][i + 1][k]; + if (k > 0) { + smoothed[j][i][k] -= forward[i][k - 1] * backward[i + 1][k + nb_segment - j - 1] * + sequence_norm; + } + if (k < j) { + smoothed[j][i][k] += forward[i][k] * backward[i + 1][k + nb_segment - j] * + sequence_norm; + } + + if (smoothed[j][i][k] < 0.) { + smoothed[j][i][k] = 0.; + } + if (smoothed[j][i][k] > 1.) { + smoothed[j][i][k] = 1.; + } + } + } + } + + else { + for (j = 1;j < nb_segment;j++) { + smoothed[j][i][j] = 1.; + } + } + + if (i == 0) { + sequence_norm = expl(backward_norm[i] - rlikelihood); + } + else { + sequence_norm = expl(forward_norm[i - 1] + backward_norm[i] - rlikelihood); + +# ifdef DEBUG + cout << i << ": " << forward_norm[i - 1] << " " << backward_norm[i] << " | " + << forward_norm[i - 1] + backward_norm[i] - rlikelihood << " " << sequence_norm << endl; +# endif + + } + + if (output == SEGMENT) { + for (j = 0;j < nb_segment;j++) { + backward_output[i][j] = smoothed[nb_segment - 1][i][j]; + } + } + + // computation of posterior change-point probabilities for the different numbers of segments + + if (i == 0) { + +# ifdef MESSAGE + lbuff = backward[i][0] * sequence_norm; + if ((lbuff < 1. - DOUBLE_ERROR) || (lbuff > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << lbuff << " | " << 1 << endl; + } +# endif + + if (output == CHANGE_POINT) { + backward_output[i][0] = 1.; + } + for (j = 1;j < nb_segment;j++) { + change_point[j][i] = 1.; + } + } + + else { + change_point[nb_segment - 1][i] = 0.; + for (j = MAX(1 , nb_segment + i - seq_length);j < MIN(nb_segment , i + 1);j++) { + if (output == CHANGE_POINT) { + backward_output[i][j] = forward[i - 1][j - 1] * backward[i][j] * sequence_norm; + } + change_point[nb_segment - 1][i] += forward[i - 1][j - 1] * backward[i][j]; + } + change_point[nb_segment - 1][i] *= sequence_norm; + + for (j = 1;j < nb_segment - 1;j++) { + change_point[j][i] = 0.; + for (k = MAX(1 , j + 1 + i - seq_length);k <= MIN(j , i);k++) { + change_point[j][i] += forward[i - 1][k - 1] * backward[i][k + nb_segment - j - 1]; + } + change_point[j][i] *= expl(forward_norm[i - 1] + backward_norm[i] - likelihood[j]); + } + } + + segment_norm = 0.; + for (j = i;j < seq_length;j++) { + segment_norm += norm[j]; + if (contrast[j] != D_INF) { + normalized_contrast[j] = expl(contrast[j] - segment_norm); + } + else { + normalized_contrast[j] = 0.; + } + } + + // extraction of segment length distributions + + if (i == 0) { + for (j = i;j <= seq_length - nb_segment;j++) { + segment_length[0]->mass[j + 1] = normalized_contrast[j] * backward[j + 1][1] * sequence_norm; + } + } + + else { + for (j = MAX(1 , nb_segment + i - seq_length);j < MIN(nb_segment , i + 1);j++) { + if (j < nb_segment - 1) { + if (backward[i][j] > 0.) { + for (k = i;k <= seq_length + j - nb_segment;k++) { + segment_length[j]->mass[k - i + 1] += forward[i - 1][j - 1] * normalized_contrast[k] * + backward[k + 1][j + 1] * sequence_norm; + + } + } + } + + else { +// segment_length[j]->mass[seq_length - i] = forward[i - 1][j - 1] * backward[i][j] * sequence_norm; + segment_length[j]->mass[seq_length - i] = forward[i - 1][j - 1] * normalized_contrast[seq_length - 1] * + sequence_norm; + } + } + } + + // computation of partial entropies for the different numbers of segments + + if (i > 0) { + for (j = 1;j < nb_segment;j++) { + sequence_norm = expl(forward_norm[i - 1] + backward_norm[i] - likelihood[j]); + + for (k = MAX(1 , j + 1 + i - seq_length);k <= MIN(j , i);k++) { + if (k < j) { + lsum = 0.; + for (m = seq_length + k - nb_segment;m >= i;m--) { + lsum += normalized_contrast[m] * backward[m + 1][k + nb_segment - j]; + if (smoothed[j][m][k] > 0.) { + lbuff = forward[i - 1][k - 1] * lsum * sequence_norm / smoothed[j][m][k]; + if (lbuff > 0.) { + state_entropy[j][m][k] += lbuff * (forward_predicted_entropy[i - 1][k - 1] - logl(lbuff)); + } + } + } + } + + else { + lsum = forward[i - 1][k - 1] * normalized_contrast[seq_length - 1] * + sequence_norm; + for (m = seq_length - 1;m >= i;m--) { + if (smoothed[j][m][k] > 0.) { + lbuff = lsum / smoothed[j][m][k]; + if (lbuff > 0.) { + state_entropy[j][m][k] += lbuff * (forward_predicted_entropy[i - 1][k - 1] - logl(lbuff)); + } + } + } + } + } + } + } + + // computation of the segmentation entropy + +# ifdef MESSAGE + if (i == 0) { + for (j = i;j <= seq_length - nb_segment;j++) { + if (contrast[j] != D_INF) { + segmentation_entropy -= normalized_contrast[j] * backward[j + 1][1] * + sequence_norm * contrast[j]; + } + } + } + + else { + for (j = MAX(1 , nb_segment + i - seq_length);j < MIN(nb_segment , i + 1);j++) { + if (j < nb_segment - 1) { + for (k = i;k <= seq_length + j - nb_segment;k++) { + if (contrast[k] != D_INF) { + segmentation_entropy -= forward[i - 1][j - 1] * normalized_contrast[k] * backward[k + 1][j + 1] * + sequence_norm * contrast[k]; + } + } + } + + else { + if (contrast[seq_length - 1] != D_INF) { + segmentation_entropy -= forward[i - 1][j - 1] * normalized_contrast[seq_length - 1] * + sequence_norm * contrast[seq_length - 1]; + } + } + } + } +# endif + + } + +// segmentation_entropy = forward_predicted_entropy[seq_length - 1][nb_segment - 1]; +// segmentation_entropy = backward_predicted_entropy[0][0]; + +# ifdef DEBUG + cout << "\n"; +// for (i = seq_length - 1;i >= 0;i--) { + for (i = 0;i < seq_length;i++) { + cout << i << " |"; + lsum = 0.; + for (j = 0;j < nb_segment;j++) { + lsum += backward[i][j]; + cout << " " << backward[i][j]; + } + cout << " | " << lsum << ", " << expl(norm[i]) << endl; + } +# endif + +# ifdef DEBUG + cout << "\n"; +// for (i = seq_length - 1;i >= 0;i--) { + for (i = 0;i < seq_length;i++) { + cout << i << " |"; + for (j = 0;j < nb_segment;j++) { + cout << " " << backward_predicted_entropy[i][j]; + } + cout << endl; + } +# endif + +# ifdef DEBUG + for (i = 1;i < nb_segment;i++) { + cout << "\n" << i + 1 << " " << SEQ_label[SEQL_SEGMENTS] << endl; + for (j = 0;j < seq_length;j++) { + cout << j << " |"; + for (k = 0;k < nb_segment;k++) { + cout << " " << state_entropy[i][j][k]; + } + cout << endl; + } + } +# endif + +# ifdef MESSAGE + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < seq_length - 1;j++) { + sum = 0.; + for (k = 0;k < nb_segment;k++) { + sum += smoothed[i][j][k]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << i << " | " << sum << endl; + } + } + } + + for (i = 1;i < nb_segment;i++) { + sum = 0.; + for (j = 0;j < seq_length;j++) { + sum += change_point[i][j]; + } + if ((sum < i + 1 - DOUBLE_ERROR) || (sum > i + 1 + DOUBLE_ERROR)) { + cout << "\nERROR: " << sum << " | " << i + 1 << endl; + } + } +# endif + + for (i = 0;i < nb_segment;i++) { + segment_length[i]->nb_value_computation(); + segment_length[i]->offset_computation(); + segment_length[i]->cumul_computation(); + segment_length[i]->max_computation(); + segment_length[i]->mean_computation(); + segment_length[i]->variance_computation(); + +# ifdef DEBUG + cout << "\n" << SEQ_label[SEQL_SEGMENT] << " " << i << " " << SEQ_label[SEQL_LENGTH] << " " + << STAT_label[STATL_DISTRIBUTION] << " "; + segment_length[i]->ascii_characteristic_print(cout); + cout << "\n"; + segment_length[i]->ascii_print(cout , false , true , false); +# endif + + } + + inf_bound = prior_segment_length_inf_bound_computation(index , nb_segment , model_type , common_contrast); + prior_segment_length = new DiscreteParametric(inf_bound , nb_segment , seq_length); + + segment_length_max = prior_segment_length->max; + for (i = 0;i < nb_segment;i++) { + if (segment_length[i]->max > segment_length_max) { + segment_length_max = segment_length[i]->max; + } + } + + // extraction of partial entropies for the different numbers of segments + + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < seq_length;j++) { + forward_partial_entropy[i][j] = 0.; + for (k = 0;k < nb_segment;k++) { + if (state_entropy[i][j][k] < 0.) { + state_entropy[i][j][k] = 0.; + } + if (smoothed[i][j][k] > 0.) { + forward_partial_entropy[i][j] += smoothed[i][j][k] * (state_entropy[i][j][k] - + log(smoothed[i][j][k])); + } + } + if (forward_partial_entropy[i][j] < 0.) { + forward_partial_entropy[i][j] = 0.; + } + } + } + + // computation of ordered change-point entropy and of marginal entropy + + for (i = 0;i < nb_segment - 1;i++) { + entropy_smoothed[i] = 0.; + } + entropy_smoothed[nb_segment - 1] = 1.; + + first_order_entropy = 0.; + marginal_entropy = 0.; + + for (i = seq_length - 2;i >= 0;i--) { + sequence_norm = expl(forward_norm[i] + backward_norm[i + 1] - rlikelihood); + +/* for (j = MIN(nb_segment - 1 , i + 1) + 1;j < nb_segment;j++) { + entropy_smoothed[j] = 0.; + } */ + +// for (j = 0;j < nb_segment;j++) { + for (j = MAX(0 , nb_segment + i - seq_length);j <= MIN(nb_segment - 1 , i + 1);j++) { + if (j > 0) { +// entropy_smoothed[j] -= forward[i][j - 1] * backward[i + 1][j] * sequence_norm; + lbuff = forward[i][j - 1] * backward[i + 1][j] * sequence_norm; + entropy_smoothed[j] -= lbuff; + if ((lbuff > 0.) && (lbuff < 1.)) { + first_order_entropy -= lbuff * logl(lbuff); + } + } + if ((entropy_smoothed[j] > 0.) && (entropy_smoothed[j] < 1.)) { + first_order_entropy -= entropy_smoothed[j] * logl(entropy_smoothed[j]); + } + + if (j < nb_segment - 1) { + entropy_smoothed[j] += forward[i][j] * backward[i + 1][j + 1] * sequence_norm; +/* lbuff = forward[i][j] * backward[i + 1][j + 1] * sequence_norm; + entropy_smoothed[j] += lbuff; + if ((lbuff > 0.) && (lbuff < 1.)) { + first_order_entropy -= lbuff * logl(lbuff); + } */ + } + + if (entropy_smoothed[j] < 0.) { + entropy_smoothed[j] = 0.; + } + if (entropy_smoothed[j] > 1.) { + entropy_smoothed[j] = 1.; + } + + if (entropy_smoothed[j] > 0.) { + first_order_entropy += entropy_smoothed[j] * logl(entropy_smoothed[j]); + marginal_entropy -= entropy_smoothed[j] * logl(entropy_smoothed[j]); + } + } + +# ifdef MESSAGE + sum = 0.; + for (j = 0;j < nb_segment;j++) { + sum += entropy_smoothed[j]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << nb_segment << " " << i << " | " << sum << endl; + } +# endif + + } + + // computation of change-point entropy profile and change-point entropy + + change_point_entropy[nb_segment - 1][0] = 0.; + change_point_entropy_sum = 0.; + for (i = 1;i < seq_length;i++) { + if ((change_point[nb_segment - 1][i] > 0.) && (change_point[nb_segment - 1][i] < 1.)) { + + change_point_entropy[nb_segment - 1][i] = -change_point[nb_segment - 1][i] * logl(change_point[nb_segment - 1][i]) - + (1 - change_point[nb_segment - 1][i]) * logl(1 - change_point[nb_segment - 1][i]); + change_point_entropy_sum += change_point_entropy[nb_segment - 1][i]; + } + else { + change_point_entropy[nb_segment - 1][i] = 0.; + } + } + + // computation of change-point entropy profiles for the different numbers of segments + + for (i = 1;i < nb_segment - 1;i++) { + change_point_entropy[i][0] = 0.; + for (j = 1;j < seq_length;j++) { + if ((change_point[i][j] > 0.) && (change_point[i][j] < 1.)) { + change_point_entropy[i][j] = -change_point[i][j] * logl(change_point[i][j]) - + (1 - change_point[i][j]) * logl(1 - change_point[i][j]); + } + else { + change_point_entropy[i][j] = 0.; + } + } + } + + // supplementary forward recurrence for computing partial entropies + + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < seq_length;j++) { + for (k = 0;k < nb_segment;k++) { + state_entropy[i][j][k] = 0.; + } + } + } + + for (i = 0;i < seq_length;i++) { + + // computation of segment log-likelihoods + + forward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + // recurrence + + if (contrast[i] != D_INF) { + normalized_contrast[i] = expl(contrast[i]); + } + else { + normalized_contrast[i] = 0.; + } + + segment_norm = 0.; + for (j = i - 1;j >= 0;j--) { + segment_norm += norm[j]; + if (contrast[j] != D_INF) { + normalized_contrast[j] = expl(contrast[j] - segment_norm); + } + else { + normalized_contrast[j] = 0.; + } + } + + for (j = 0;j < nb_segment;j++) { + forward[i][j] = 0.; + } + norm[i] = 0.; + +// for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + forward[i][j] = normalized_contrast[0]; + } + else { + for (k = i;k >= j;k--) { + forward[i][j] += normalized_contrast[k] * forward[k - 1][j - 1]; + } + } + + norm[i] += forward[i][j]; + } + + if (norm[i] > 0.) { +// for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + forward[i][j] /= norm[i]; + } + + norm[i] = logl(norm[i]); + } + + // computation of partial entropies + + segment_norm = 0.; + for (j = i;j >= 0;j--) { +// segment_norm += entropy_norm[j]; + segment_norm += norm[j]; + if (contrast[j] != D_INF) { + normalized_contrast[j] = expl(contrast[j] - segment_norm); + } + else { + normalized_contrast[j] = 0.; + } + } + + if (i < seq_length - 1) { + for (j = 1;j < nb_segment;j++) { + sequence_norm = expl(forward_norm[i] + backward_norm[i + 1] - likelihood[j]); + + for (k = MAX(nb_segment - 1 - j , nb_segment + i - seq_length);k <= MIN(nb_segment - 2 , i + nb_segment - 1 - j);k++) { + if (k == nb_segment - 1 - j) { + lsum = normalized_contrast[0] * backward[i + 1][k + 1] * sequence_norm; + for (m = 0;m <= i;m++) { + if (smoothed[j][m][0] > 0.) { + lbuff = lsum / smoothed[j][m][0]; + if (lbuff > 0.) { + state_entropy[j][m][0] += lbuff * (backward_predicted_entropy[i + 1][k + 1] - logl(lbuff)); + } + } + } + } + + else { + lsum = 0.; + for (m = k + j - nb_segment + 1;m <= i;m++) { + lsum += forward[m - 1][k + j - nb_segment] * normalized_contrast[m]; + if (smoothed[j][m][k + j - nb_segment + 1] > 0.) { + lbuff = lsum * backward[i + 1][k + 1] * sequence_norm / + smoothed[j][m][k + j - nb_segment + 1]; + if (lbuff > 0.) { + state_entropy[j][m][k + j - nb_segment + 1] += lbuff * (backward_predicted_entropy[i + 1][k + 1] - logl(lbuff)); + } + } + } + } + } + } + } + } + +# ifdef DEBUG + for (i = 1;i < nb_segment;i++) { + cout << "\n" << i + 1 << " " << SEQ_label[SEQL_SEGMENTS] << endl; + for (j = 0;j < seq_length;j++) { + cout << j << " |"; + for (k = 0;k < nb_segment;k++) { + cout << " " << state_entropy[i][j][k]; + } + cout << endl; + } + } +# endif + + // extraction of partial entropies for the different numbers of segments + + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < seq_length - 1;j++) { + backward_partial_entropy[i][j + 1] = 0.; + for (k = 0;k < nb_segment;k++) { + if (state_entropy[i][j][k] < 0.) { + state_entropy[i][j][k] = 0.; + } + if (smoothed[i][j][k] > 0.) { + backward_partial_entropy[i][j + 1] += smoothed[i][j][k] * (state_entropy[i][j][k] - + log(smoothed[i][j][k])); + } + } + if (backward_partial_entropy[i][j + 1] < 0.) { + backward_partial_entropy[i][j + 1] = 0.; + } + } + } + +# ifdef MESSAGE + cout << "\n" << SEQ_label[SEQL_SEGMENTATION_ENTROPY] << endl; + for (i = 1;i < nb_segment - 1;i++) { + cout << i + 1 << " " << SEQ_label[SEQL_SEGMENTS] << ": " + << forward_predicted_entropy[seq_length - 1][i] << ", " + << backward_predicted_entropy[0][nb_segment - 1 - i] << ", " + << forward_partial_entropy[i][seq_length - 1] << ", " + << backward_partial_entropy[i][1] << endl; + } + cout << nb_segment << " " << SEQ_label[SEQL_SEGMENTS] << ": " + << forward_predicted_entropy[seq_length - 1][nb_segment - 1] << ", " + << backward_predicted_entropy[0][0] << ", " + << forward_partial_entropy[nb_segment - 1][seq_length - 1] << ", " + << backward_partial_entropy[nb_segment - 1][1] + << " | " << segmentation_entropy << endl; +# endif + +# ifdef DEBUG + for (i = 1;i < nb_segment;i++) { + cout << "\n"; + for (j = 0;j < seq_length;j++) { + cout << forward_partial_entropy[i][j] << " "; + } + if (i == nb_segment - 1) { + cout << " | " << segmentation_entropy; + } + cout << endl; + } + + for (i = 1;i < nb_segment;i++) { + cout << "\n"; + if (i == nb_segment - 1) { + cout << segmentation_entropy << " | "; + } + for (j = 1;j < seq_length;j++) { + cout << backward_partial_entropy[i][j] << " "; + } + cout << endl; + } +# endif + + for (i = 1;i < nb_segment;i++) { + for (j = seq_length - 1;j >= 1;j--) { + forward_partial_entropy[i][j] -= forward_partial_entropy[i][j - 1]; + if (forward_partial_entropy[i][j] < 0.) { + forward_partial_entropy[i][j] = 0.; + } + } + } + + for (i = 1;i < nb_segment;i++) { + backward_partial_entropy[i][0] = 0.; + for (j = 1;j < seq_length - 1;j++) { + backward_partial_entropy[i][j] -= backward_partial_entropy[i][j + 1]; + if (backward_partial_entropy[i][j] < 0.) { + backward_partial_entropy[i][j] = 0.; + } + } + } + +# ifdef MESSAGE + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < seq_length;j++) { + if (forward_partial_entropy[i][j] > change_point_entropy[i][j]) { + cout << "\n" << SEQ_label[SEQL_BEGIN_CONDITIONAL_ENTROPY] << " ERROR: " + << forward_partial_entropy[i][j] << " " << change_point_entropy[i][j] + << " | " << i << ", " << j + 1 << endl; + } + + if (backward_partial_entropy[i][j] > change_point_entropy[i][j]) { + cout << "\n" << SEQ_label[SEQL_END_CONDITIONAL_ENTROPY] << " ERROR: " + << backward_partial_entropy[i][j] << " " << change_point_entropy[i][j] + << " | " << i << ", " << j + 1 << endl; + } + } + } +# endif + + if ((os) || (plot_set)) { + switch (format) { + + case ASCII : { + switch (output) { + case CHANGE_POINT : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_CHANGE_POINT_PROBABILITY] << "\n\n"; + break; + case SEGMENT : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_SEGMENT_PROBABILITY] << "\n\n"; + break; + } + + profile_ascii_print(*os , index , nb_segment , backward_output , + (output == CHANGE_POINT ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_SEGMENT]) , + NULL , change_point , segment_length , prior_segment_length , + forward_partial_entropy , backward_partial_entropy , change_point_entropy); + + *os << "\n" << SEQ_label[SEQL_POSSIBLE_SEGMENTATION_LIKELIHOOD] << ": " << rlikelihood << endl; + *os << "\n" << SEQ_label[SEQL_SEGMENTATION_ENTROPY] << ": " << segmentation_entropy + << "\n" << SEQ_label[SEQL_FIRST_ORDER_ENTROPY] << ": " << first_order_entropy + << "\n" << SEQ_label[SEQL_CHANGE_POINT_ENTROPY] << ": " << change_point_entropy_sum +// << " (" << change_point_entropy_sum / nb_segment << ")"; + << "\n" << SEQ_label[SEQL_MARGINAL_ENTROPY] << ": " << marginal_entropy << endl; + + // extraction of change-point credibility intervals + + if (output == CHANGE_POINT) { + *os << "\n" << SEQ_label[SEQL_CHANGE_POINT_CREDIBILITY_INTERVALS] << endl; + for (i = 1;i < nb_segment;i++) { + *os << SEQ_label[SEQL_CHANGE_POINT] << " " << i << " ("; + + sum = 0.; + j = 0; + while (sum < CHANGE_POINT_CREDIBILITY_PROBABILITY / 2) { + j++; + sum += backward_output[j][i]; + } + *os << seq_index_parameter[j] << ", "; + +# ifdef MESSAGE + while (sum <= 1. - CHANGE_POINT_CREDIBILITY_PROBABILITY / 2) { + j++; + sum += backward_output[j][i]; + } + *os << seq_index_parameter[j] << " | "; +# endif + + sum = 0.; + j = seq_length; + while (sum < CHANGE_POINT_CREDIBILITY_PROBABILITY / 2) { + j--; + sum += backward_output[j][i]; + } + *os << seq_index_parameter[j] << ")" << endl; + } + } + + break; + } + + case SPREADSHEET : { + switch (output) { + case CHANGE_POINT : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_CHANGE_POINT_PROBABILITY] << "\n\n"; + break; + case SEGMENT : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_SEGMENT_PROBABILITY] << "\n\n"; + break; + } + + profile_spreadsheet_print(*os , index , nb_segment , backward_output , + (output == CHANGE_POINT ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_SEGMENT]) , + common_contrast , NULL , change_point , segment_length , prior_segment_length , + forward_partial_entropy , backward_partial_entropy , change_point_entropy); + + *os << "\n" << SEQ_label[SEQL_POSSIBLE_SEGMENTATION_LIKELIHOOD] << "\t" << rlikelihood << endl; + *os << "\n" << SEQ_label[SEQL_SEGMENTATION_ENTROPY] << "\t" << segmentation_entropy + << "\n" << SEQ_label[SEQL_FIRST_ORDER_ENTROPY] << "\t" << first_order_entropy + << "\n" << SEQ_label[SEQL_CHANGE_POINT_ENTROPY] << "\t" << change_point_entropy_sum +// << "\t" << change_point_entropy_sum / nb_segment; + << "\n" << SEQ_label[SEQL_MARGINAL_ENTROPY] << "\t" << marginal_entropy << endl; + break; + } + + case GNUPLOT : { + profile_plot_print(*os , index , nb_segment , backward_output , + common_contrast , NULL , change_point , + segment_length , prior_segment_length , forward_partial_entropy , + backward_partial_entropy , change_point_entropy); + break; + } + + case PLOT : { + MultiPlotSet &plot = *plot_set; + + i = 1; + for (j = 1;j < nb_variable;j++) { + if ((model_type[j - 1] == POISSON_CHANGE) || + (model_type[j - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[j - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type[j - 1] == GAUSSIAN_CHANGE) || + (model_type[j - 1] == VARIANCE_CHANGE) || (model_type[j - 1] == BAYESIAN_POISSON_CHANGE) || + (model_type[j - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + i++; + } + } + + profile_plotable_write(plot[i] , index , nb_segment , backward_output); + i++; + change_point_profile_plotable_write(plot[i] , index , nb_segment , change_point); + i++; + segment_length_distribution_plotable_write(plot[i] , nb_segment , segment_length_max , + segment_length , prior_segment_length); + i++; + change_point_profile_plotable_write(plot[i] , index , nb_segment , forward_partial_entropy); + i++; + change_point_profile_plotable_write(plot[i] , index , nb_segment , backward_partial_entropy); + i++; + entropy_profile_plotable_write(plot[i] , index , forward_partial_entropy[nb_segment - 1] , + backward_partial_entropy[nb_segment - 1] , + change_point_entropy[nb_segment - 1]); + break; + } + } + } + } + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] factorial[i][j]; + } + delete [] factorial[i]; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] binomial_coeff[i][j]; + } + delete [] binomial_coeff[i]; + } + + delete [] seq_mean[i]; + delete [] hyperparam[i]; + } + delete [] factorial; + delete [] inf_bound_parameter; + delete [] binomial_coeff; + delete [] seq_mean; + delete [] hyperparam; + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + delete [] contrast; + delete [] normalized_contrast; + + for (i = 0;i < seq_length;i++) { + delete [] forward[i]; + } + delete [] forward; + + delete [] segment_predicted; + + for (i = 0;i < seq_length;i++) { + delete [] forward_predicted_entropy[i]; + } + delete [] forward_predicted_entropy; + + delete [] norm; + delete [] forward_norm; +// delete [] entropy_norm; + + delete [] likelihood; + + for (i = 0;i < seq_length;i++) { + delete [] backward[i]; + } + delete [] backward; + + for (i = 0;i < seq_length;i++) { + delete [] backward_predicted_entropy[i]; + } + delete [] backward_predicted_entropy; + + delete [] backward_norm; + + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < seq_length;j++) { + delete [] smoothed[i][j]; + } + delete [] smoothed[i]; + } + delete [] smoothed; + + for (i = 0;i < seq_length;i++) { + delete [] backward_output[i]; + } + delete [] backward_output; + + for (i = 0;i < nb_segment;i++) { + delete segment_length[i]; + } + delete [] segment_length; + + delete prior_segment_length; + + for (i = 1;i < nb_segment;i++) { + delete [] change_point[i]; + } + delete [] change_point; + + delete [] entropy_smoothed; + + for (i = 1;i < nb_segment;i++) { + for (j = 0;j < seq_length;j++) { + delete [] state_entropy[i][j]; + } + delete [] state_entropy[i]; + } + delete [] state_entropy; + + for (i = 1;i < nb_segment;i++) { + delete [] forward_partial_entropy[i]; + } + delete [] forward_partial_entropy; + + for (i = 1;i < nb_segment;i++) { + delete [] backward_partial_entropy[i]; + } + delete [] backward_partial_entropy; + + for (i = 1;i < nb_segment;i++) { + delete [] change_point_entropy[i]; + } + delete [] change_point_entropy; + + return rlikelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation of segmentations of a single sequence or a sample of sequences. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] rank ranks (for ordinal variables), + * \param[in] os stream, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] nb_segmentation number of segmentations. + * + * \return log-likelihood of the multiple change-point model. + */ +/*--------------------------------------------------------------*/ + +double Sequences::forward_backward_sampling(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + double **rank , ostream &os , output_format format , + int nb_segmentation) const + +{ + int i , j , k , m; + int seq_length , segment_length , *inf_bound_parameter , *seq_index_parameter , *change_point , + *psegment; + double sum , likelihood , segmentation_likelihood , *backward , *cumul_backward , **seq_mean , + **hyperparam , ***factorial , ***binomial_coeff , ***mean , ***variance , + ***intercept , ***slope , ***autoregressive_coeff; + long double segment_norm , sequence_norm , *contrast , *norm , **forward; + + + factorial = new double**[nb_variable]; + inf_bound_parameter = new int[nb_variable]; + binomial_coeff = new double**[nb_variable]; + seq_mean = new double*[nb_variable]; + hyperparam = new double*[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + + // computation of log of factorials for Poisson models + + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + factorial[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + factorial[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + factorial[i][j][k] = log_factorial(int_sequence[j][i][k]); + } + } + else { + factorial[i][j] = NULL; + } + } + } + + else { + factorial[i] = NULL; + } + + // computation of log of binomial coefficients for negative binomial models + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + + binomial_coeff[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + binomial_coeff[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + binomial_coeff[i][j][k] = log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[j][i][k]); + } + } + else { + binomial_coeff[i][j] = NULL; + } + } + } + + else { + binomial_coeff[i] = NULL; + } + + // computation of sequence means for Gaussian change in the variance models or + // stationary piecewise autoregressive models + + if ((model_type[i - 1] == VARIANCE_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + seq_mean[i] = new double[nb_sequence]; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += int_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += real_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + } + + else { + seq_mean[i] = new double[1]; + seq_mean[i][0] = 0.; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += int_sequence[k][i][j]; + } + } + } + + else { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += real_sequence[k][i][j]; + } + } + } + + seq_mean[i][0] /= (nb_sequence * length[0]); + } + } + + else { + seq_mean[i] = NULL; + } + + // computation of hyperparameters for Bayesian Poisson and Gaussian models + + if (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) { + hyperparam[i] = new double[2]; + gamma_hyperparameter_computation(index , i , hyperparam[i]); + } + else if (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) { + hyperparam[i] = new double[4]; + gaussian_gamma_hyperparameter_computation(index , i , hyperparam[i]); + } + else { + hyperparam[i] = NULL; + } + } + + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + + contrast = new long double[seq_length]; + + forward = new long double*[seq_length]; + for (i = 0;i < seq_length;i++) { + forward[i] = new long double[nb_segment]; + } + + norm = new long double[seq_length]; + + backward = new double[seq_length]; + cumul_backward = new double[seq_length]; + + change_point = new int[nb_segment + 1]; + + mean = new double**[nb_variable]; + variance = new double**[nb_variable]; + intercept = new double**[nb_variable]; + slope = new double**[nb_variable]; + autoregressive_coeff = new double**[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[i - 1] == VARIANCE_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || + (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + mean[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + mean[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + } + else { + mean[i][j] = NULL; + variance[i][j] = NULL; + } + } + } + + else { + mean[i] = new double*[1]; + mean[i][0] = new double[nb_segment]; + variance[i] = new double*[1]; + variance[i][0] = new double[nb_segment]; + } + } + + else if (model_type[i - 1] == LINEAR_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + intercept[i] = new double*[nb_sequence]; + slope[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + intercept[i][j] = new double[nb_segment]; + slope[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + } + else { + intercept[i][j] = NULL; + slope[i][j] = NULL; + variance[i][j] = NULL; + } + } + } + + else { + intercept[i] = new double*[1]; + intercept[i][0] = new double[nb_segment]; + slope[i] = new double*[1]; + slope[i][0] = new double[nb_segment]; + variance[i] = new double*[1]; + variance[i][0] = new double[nb_segment]; + } + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + mean[i] = new double*[nb_sequence]; + autoregressive_coeff[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + mean[i][j] = new double[nb_segment]; + autoregressive_coeff[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + } + else { + mean[i][j] = NULL; + autoregressive_coeff[i][j] = NULL; + variance[i][j] = NULL; + } + } + } + + else { + mean[i] = new double*[1]; + mean[i][0] = new double[nb_segment]; + autoregressive_coeff[i] = new double*[1]; + autoregressive_coeff[i][0] = new double[nb_segment]; + variance[i] = new double*[1]; + variance[i][0] = new double[nb_segment]; + } + } + } + +# ifdef DEBUG + double **segment_probability; + + segment_probability = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + segment_probability[i] = new double[nb_segment]; + for (j = 0;j < nb_segment;j++) { + segment_probability[i][j] = 0.; + } + } +# endif + + // forward recurrence + + for (i = 0;i < seq_length;i++) { + + // computation of segment log-likelihoods + + forward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + if (contrast[i] != D_INF) { + contrast[i] = expl(contrast[i]); + } + else { + contrast[i] = 0.; + } + + segment_norm = 0.; + for (j = i - 1;j >= 0;j--) { + segment_norm += norm[j]; + if (contrast[j] != D_INF) { + contrast[j] = expl(contrast[j] - segment_norm); + } + else { + contrast[j] = 0.; + } + } + + for (j = 0;j < nb_segment;j++) { + forward[i][j] = 0.; + } + norm[i] = 0.; + + for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + forward[i][j] = contrast[0]; + } + else { + for (k = i;k >= j;k--) { + forward[i][j] += contrast[k] * forward[k - 1][j - 1]; + } + } + + norm[i] += forward[i][j]; + } + + if (norm[i] > 0.) { + for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + forward[i][j] /= norm[i]; + } + + norm[i] = logl(norm[i]); + } + +# ifdef DEBUG + cout << i << " |"; + for (j = 0;j < nb_segment;j++) { + cout << " " << forward[i][j]; + } + cout << " | " << expl(norm[i]) << endl; +# endif + + } + + sequence_norm = segment_norm + norm[seq_length - 1]; + + if (forward[seq_length - 1][nb_segment - 1] > 0.) { + likelihood = logl(forward[seq_length - 1][nb_segment - 1]) + sequence_norm; + } + else { + likelihood = D_INF; + } + + if (likelihood != D_INF) { + + // backward pass + +# ifdef MESSAGE + cout << "\n"; +# endif + + for (i = 0;i < nb_segmentation;i++) { + j = seq_length - 1; + change_point[nb_segment] = seq_length; + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0] + j; + segmentation_likelihood = sequence_norm; + + for (k = nb_segment - 1;k >= 0;k--) { + + // computation of segment log-likelihoods + + forward_contrast(j , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast , k); + + segment_norm = 0.; + for (m = j;m >= k;m--) { + segment_norm += norm[m]; + if (contrast[m] != D_INF) { + contrast[m] = expl(contrast[m] - segment_norm); + } + else { + contrast[m] = 0.; + } + } + + if (k > 0) { + for (m = j;m >= k;m--) { + backward[m] = contrast[m] * forward[m - 1][k - 1] / forward[j][k]; + } + stat_tool::cumul_computation(j - k , backward + k , cumul_backward); + segment_length = j - (k + cumul_method(j - k , cumul_backward)) + 1; + +# ifdef MESSAGE + sum = 0.; + for (m = j;m >= k;m--) { + sum += backward[m]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << j << " " << sum << endl; + } +# endif + + } + + else { + segment_length = j + 1; + } + + segmentation_likelihood += logl(contrast[j - segment_length + 1]); + + for (m = j;m > j - segment_length;m--) { + *psegment-- = k; + } + j -= segment_length; + change_point[k] = j + 1; + } + + for (j = 1;j < nb_variable;j++) { + piecewise_linear_function(index , j , nb_segment , model_type[j - 1] , common_contrast , + change_point , seq_index_parameter , NULL , mean[j] , variance[j] , + NULL , intercept[j] , slope[j] , autoregressive_coeff[j]); + } + +# ifdef DEBUG + + // approximation of smoothed probabilities + + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (j = 0;j < seq_length;j++) { + segment_probability[j][*psegment++]++; + } +# endif + +# ifdef MESSAGE + if (i == 0) { + os << "\n"; + } + + switch (format) { + + case ASCII : { + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (j = 0;j < seq_length;j++) { + os << *psegment++ << " "; + } + + os << " " << i + 1 << " " << segmentation_likelihood << " (" + << exp(segmentation_likelihood - likelihood) << ")" << endl; + + os << (nb_segment == 2 ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_CHANGE_POINTS]) << ": "; + + for (j = 1;j < nb_segment;j++) { + os << seq_index_parameter[change_point[j]]; + if (j < nb_segment - 1) { + os << ", "; + } + } + os << endl; + + for (j = 1;j < nb_variable;j++) { + piecewise_linear_function_ascii_print(os , index , j , nb_segment , model_type[j - 1] , + common_contrast , change_point , seq_index_parameter , + mean[j] , variance[j] , intercept[j] , slope[j] , + autoregressive_coeff[j]); + } + break; + } + + case SPREADSHEET : { + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (j = 0;j < seq_length;j++) { + os << *psegment++ << "\t"; + } + + os << "\t" << i + 1 << "\t" << segmentation_likelihood << "\t" + << exp(segmentation_likelihood - likelihood) << endl; + + for (j = 1;j < nb_variable;j++) { + piecewise_linear_function_spreadsheet_print(os , index , j , nb_segment , model_type[j - 1] , + common_contrast , change_point , seq_index_parameter , + mean[j] , variance[j] , intercept[j] , slope[j] , + autoregressive_coeff[j]); + } + break; + } + } +# endif + + } + +# ifdef DEBUG + if (nb_segmentation >= 1000) { + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + segment_probability[i][j] /= nb_segmentation; + } + } + + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (i = 0;i < seq_length;i++) { + *psegment++ = I_DEFAULT; + } + + os << "\n" << SEQ_label[SEQL_POSTERIOR_SEGMENT_PROBABILITY] << "\n\n"; + + profile_ascii_print(os , index , nb_segment , segment_probability , + SEQ_label[SEQL_SEGMENT]); + } +# endif + + } + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] factorial[i][j]; + } + delete [] factorial[i]; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] binomial_coeff[i][j]; + } + delete [] binomial_coeff[i]; + } + + delete [] seq_mean[i]; + delete [] hyperparam[i]; + } + delete [] factorial; + delete [] inf_bound_parameter; + delete [] binomial_coeff; + delete [] seq_mean; + delete [] hyperparam; + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + delete [] contrast; + + for (i = 0;i < seq_length;i++) { + delete [] forward[i]; + } + delete [] forward; + + delete [] norm; + + delete [] backward; + delete [] cumul_backward; + + delete [] change_point; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[i - 1] == VARIANCE_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || + (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + delete [] mean[i][j]; + delete [] variance[i][j]; + } + } + } + + else { + delete [] mean[i][0]; + delete [] variance[i][0]; + } + + delete [] mean[i]; + delete [] variance[i]; + } + + else if (model_type[i - 1] == LINEAR_MODEL_CHANGE) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + delete [] intercept[i][j]; + delete [] slope[i][j]; + delete [] variance[i][j]; + } + } + } + + else { + delete [] intercept[i][0]; + delete [] slope[i][0]; + delete [] variance[i][0]; + } + + delete [] intercept[i]; + delete [] slope[i]; + delete [] variance[i]; + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + delete [] mean[i][j]; + delete [] autoregressive_coeff[i][j]; + delete [] variance[i][j]; + } + } + } + + else { + delete [] mean[i][0]; + delete [] autoregressive_coeff[i][0]; + delete [] variance[i][0]; + } + + delete [] mean[i]; + delete [] autoregressive_coeff[i]; + delete [] variance[i]; + } + } + + delete [] mean; + delete [] variance; + delete [] intercept; + delete [] slope; + delete [] autoregressive_coeff; + +# ifdef DEBUG + for (i = 0;i < seq_length;i++) { + delete [] segment_probability[i]; + } + delete [] segment_probability; +# endif + + return likelihood; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/change_points4.cpp b/src/cpp/sequence_analysis/change_points4.cpp new file mode 100644 index 0000000..9604f7f --- /dev/null +++ b/src/cpp/sequence_analysis/change_points4.cpp @@ -0,0 +1,3302 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: change_points4.cpp 11914 2012-03-26 06:29:13Z guedon $ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + +extern double log_factorial(int value); +extern double log_binomial_coefficient(int inf_bound , double parameter , int value); + + +#if defined (SYSTEM_IS__CYGWIN) +#define expl exp +#endif + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the N most probable segmentations of a single sequence or + * a sample of sequences. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] irank ranks (for ordinal variables), + * \param[in] os stream, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] inb_segmentation number of segmentations, + * \param[in] likelihood log-likelihood of the multiple change-point model. + * + * \return log-likelihood of the optimal segmentation. + */ +/*--------------------------------------------------------------*/ + +double Sequences::N_segmentation(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + double **irank , ostream &os , output_format format , + int inb_segmentation , double likelihood) const + +{ + bool **active_cell; + int i , j , k , m; + int seq_length , brank , previous_rank , count , nb_cell , *inf_bound_parameter , + *seq_index_parameter , *rank , *change_point , *psegment , ***optimal_length , ***optimal_rank; + double buff , segmentation_likelihood , *nb_segmentation , **hyperparam , **seq_mean , + **nb_segmentation_forward , ***factorial , ***binomial_coeff , ***forward , ***mean , + ***variance , ***intercept , ***slope , ***autoregressive_coeff; + long double *contrast , likelihood_cumul , posterior_probability_cumul; + +# ifdef MESSAGE + int *first_change_point; + long double norm; +# endif + + + // initializations + + factorial = new double**[nb_variable]; + inf_bound_parameter = new int[nb_variable]; + binomial_coeff = new double**[nb_variable]; + seq_mean = new double*[nb_variable]; + hyperparam = new double*[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + + // computation of log of factorials for Poisson models + + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + factorial[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + factorial[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + factorial[i][j][k] = log_factorial(int_sequence[j][i][k]); + } + } + else { + factorial[i][j] = NULL; + } + } + } + + else { + factorial[i] = NULL; + } + + // computation of log of binomial coefficients for negative binomial models + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + + binomial_coeff[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + binomial_coeff[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + binomial_coeff[i][j][k] = log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[j][i][k]); + } + } + else { + binomial_coeff[i][j] = NULL; + } + } + } + + else { + binomial_coeff[i] = NULL; + } + + // computation of sequence means for Gaussian change in the variance models or + // stationary piecewise autoregressive models + + if ((model_type[i - 1] == VARIANCE_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + seq_mean[i] = new double[nb_sequence]; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += int_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += real_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + } + + else { + seq_mean[i] = new double[1]; + seq_mean[i][0] = 0.; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += int_sequence[k][i][j]; + } + } + } + + else { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += real_sequence[k][i][j]; + } + } + } + + seq_mean[i][0] /= (nb_sequence * length[0]); + } + } + + else { + seq_mean[i] = NULL; + } + + // computation of hyperparameters for Bayesian Poisson and Gaussian models + + if (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) { + hyperparam[i] = new double[2]; + gamma_hyperparameter_computation(index , i , hyperparam[i]); + } + else if (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) { + hyperparam[i] = new double[4]; + gaussian_gamma_hyperparameter_computation(index , i , hyperparam[i]); + } + else { + hyperparam[i] = NULL; + } + } + + seq_length = length[index == I_DEFAULT ? 0 : index]; + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + + contrast = new long double[seq_length]; + + nb_segmentation_forward = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + nb_segmentation_forward[i] = new double[nb_segment]; + } + + forward = new double**[seq_length]; + for (i = 0;i < seq_length;i++) { + forward[i] = new double*[nb_segment]; + for (j = 0;j < nb_segment;j++) { + forward[i][j] = new double[inb_segmentation]; + } + } + + nb_segmentation = new double[nb_segment]; + rank = new int[seq_length + 1]; + + optimal_length = new int**[seq_length]; + for (i = 0;i < seq_length;i++) { + optimal_length[i] = new int*[nb_segment]; + for (j = 0;j < nb_segment;j++) { + optimal_length[i][j] = new int[inb_segmentation]; + } + } + + optimal_rank = new int**[seq_length]; + for (i = 0;i < seq_length;i++) { + optimal_rank[i] = new int*[nb_segment]; + for (j = 0;j < nb_segment;j++) { + optimal_rank[i][j] = new int[inb_segmentation]; + } + } + + change_point = new int[nb_segment + 1]; + + mean = new double**[nb_variable]; + variance = new double**[nb_variable]; + intercept = new double**[nb_variable]; + slope = new double**[nb_variable]; + autoregressive_coeff = new double**[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + mean[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + mean[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + } + else { + mean[i][j] = NULL; + variance[i][j] = NULL; + } + } + } + + else { + mean[i] = new double*[1]; + mean[i][0] = new double[nb_segment]; + variance[i] = new double*[1]; + variance[i][0] = new double[nb_segment]; + } + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + intercept[i] = new double*[nb_sequence]; + slope[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + intercept[i][j] = new double[nb_segment]; + slope[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + } + else { + intercept[i][j] = NULL; + slope[i][j] = NULL; + variance[i][j] = NULL; + } + } + } + + else { + intercept[i] = new double*[1]; + intercept[i][0] = new double[nb_segment]; + slope[i] = new double*[1]; + slope[i][0] = new double[nb_segment]; + variance[i] = new double*[1]; + variance[i][0] = new double[nb_segment]; + } + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + mean[i] = new double*[nb_sequence]; + autoregressive_coeff[i] = new double*[nb_sequence]; + variance[i] = new double*[nb_sequence]; + + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + mean[i][j] = new double[nb_segment]; + autoregressive_coeff[i][j] = new double[nb_segment]; + variance[i][j] = new double[nb_segment]; + } + else { + mean[i][j] = NULL; + autoregressive_coeff[i][j] = NULL; + variance[i][j] = NULL; + } + } + } + + else { + mean[i] = new double*[1]; + mean[i][0] = new double[nb_segment]; + autoregressive_coeff[i] = new double*[1]; + autoregressive_coeff[i][0] = new double[nb_segment]; + variance[i] = new double*[1]; + variance[i][0] = new double[nb_segment]; + } + } + } + + active_cell = new bool*[seq_length]; + for (i = 0;i < seq_length;i++) { + active_cell[i] = new bool[nb_segment]; + for (j = 0;j < nb_segment;j++) { + active_cell[i][j] = false; + } + } + +# ifdef DEBUG + double **segment_probability; + + segment_probability = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + segment_probability[i] = new double[nb_segment]; + for (j = 0;j < nb_segment;j++) { + segment_probability[i][j] = D_INF; + } + } +# endif + +# ifdef MESSAGE + double **smoothed_probability; + long double approximated_likelihood = 0.; + + if (inb_segmentation >= 1000) { + smoothed_probability = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + smoothed_probability[i] = new double[nb_segment]; + for (j = 0;j < nb_segment;j++) { + smoothed_probability[i][j] = 0.; + } + } + } +# endif + +# ifdef DEBUG + for (i = 0;i < nb_segment;i++) { + nb_segmentation[i] = 1; + } +# endif + + // forward recurrence + + for (i = 0;i < seq_length;i++) { + + // computation of segment contrast functions (log-likelihoods or sum of squared deviations) + + forward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , irank , contrast); + +# ifdef DEBUG + for (j = i - 1;j >= 0;j--) { + cout << contrast[j] << " "; + } + cout << endl; +# endif + + // computation of the number of segmentations + + for (j = 0;j < nb_segment;j++) { + nb_segmentation_forward[i][j] = 0; + } + + for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + if (contrast[0] != D_INF) { + nb_segmentation_forward[i][j]++; + } + } + + else { + for (k = i;k >= j;k--) { + if (contrast[k] != D_INF) { + nb_segmentation_forward[i][j] += nb_segmentation_forward[k - 1][j - 1]; + } + } + } + } + +# ifdef DEBUG + nb_segmentation[0] = 1; + for (j = 1;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + nb_segmentation[j] = nb_segmentation[j - 1] * (i - j + 1) / j; + } + + if (i < inb_segmentation) { + cout << i << ": "; + for (j = 1;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + cout << nb_segmentation_forward[i][j] << " " << nb_segmentation[j] << " | "; + } + cout << endl; + } +# endif + + for (j = 0;j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + nb_segmentation[j] = nb_segmentation_forward[i][j]; + if (nb_segmentation[j] > inb_segmentation) { + nb_segmentation[j] = inb_segmentation; + } + } + + for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + forward[i][j][0] = contrast[0]; + if (forward[i][j][0] != D_INF) { + optimal_length[i][j][0] = i + 1; + } + } + + else { +/* if (j < nb_segment - 1) { + if ((i > j) && (nb_segmentation[j] < inb_segmentation)) { + nb_segmentation[j] = nb_segmentation[j] * i / (i - j); + if (nb_segmentation[j] > inb_segmentation) { + nb_segmentation[j] = inb_segmentation; + } + } + } + + else { + nb_segmentation[j] = nb_segmentation[j - 1] * (i - j + 1) / j; + if (nb_segmentation[j] > inb_segmentation) { + nb_segmentation[j] = inb_segmentation; + } + } */ + +# ifdef DEBUG + cout << "TEST: " << i << " " << j << ": " << nb_segmentation[j] << endl; +# endif + + for (k = i;k >= j;k--) { + rank[k] = 0; + } + + for (k = 0;k < nb_segmentation[j];k++) { + forward[i][j][k] = D_INF; + for (m = i;m >= j;m--) { + if ((contrast[m] != D_INF) && (forward[m - 1][j - 1][rank[m]] != D_INF)) { + buff = contrast[m] + forward[m - 1][j - 1][rank[m]]; + if (buff > forward[i][j][k]) { + forward[i][j][k] = buff; + optimal_length[i][j][k] = i - m + 1; + optimal_rank[i][j][k] = rank[m]; + } + } + } + + if (forward[i][j][k] != D_INF) { + rank[i - optimal_length[i][j][k] + 1]++; + } + +# ifdef DEBUG + else { + cout << "\nuseful test" << endl; + } +# endif + + } + } + + for (k = (int)nb_segmentation[j];k < inb_segmentation;k++) { + forward[i][j][k] = D_INF; + } + } + +# ifdef DEBUG + cout << i << " : "; + for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + cout << j << " :"; + for (k = 0;k < nb_segmentation[j];k++) { + cout << " " << forward[i][j][k]; + if (forward[i][j][k] != D_INF) { + cout << " " << optimal_length[i][j][k]; + } + cout << " |"; + } + cout << "| "; + } + cout << endl; +# endif + + } + +# ifdef MESSAGE + streamsize nb_digits; + + nb_digits = os.precision(10); + + os << "\n" << SEQ_label[SEQL_NB_SEGMENTATION] << ": " + << nb_segmentation_forward[seq_length - 1][nb_segment - 1] + << " (" << nb_segmentation_computation(index , nb_segment , model_type , common_contrast) << ")" << endl; + + os.precision(nb_digits); + + if (((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) && + (format == SPREADSHEET) && (nb_segment == 2) && (inb_segmentation >= seq_length - 1)) { + first_change_point = new int[seq_length]; + } +# endif + + // restoration + + likelihood_cumul = 0.; + posterior_probability_cumul = 0.; + + for (i = 0;i < nb_segmentation[nb_segment - 1];i++) { + if (forward[seq_length - 1][nb_segment - 1][i] == D_INF) { + break; + } + +# ifdef DEBUG + cout << "\n"; +# endif + + j = seq_length - 1; + change_point[nb_segment] = seq_length; + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0] + j; + brank = i; + + for (k = nb_segment - 1;k >= 0;k--) { + for (m = j;m > j - optimal_length[j][k][brank];m--) { + active_cell[m][k] = true; + *psegment-- = k; + } + +# ifdef DEBUG + cout << k << " " << optimal_length[j][k][brank] << " " << brank << " | "; +# endif + + if (k > 0) { + previous_rank = optimal_rank[j][k][brank]; + } + j -= optimal_length[j][k][brank]; + change_point[k] = j + 1; + if (k > 0) { + brank = previous_rank; + } + } + +# ifdef DEBUG + cout << endl; +# endif + + for (j = 1;j < nb_variable;j++) { + piecewise_linear_function(index , j , nb_segment , model_type[j - 1] , common_contrast , + change_point , seq_index_parameter , NULL , mean[j] , variance[j] , + NULL , intercept[j] , slope[j] , autoregressive_coeff[j]); + } + + if ((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if (forward[seq_length - 1][nb_segment - 1][i] < 0.) { + count = (index == I_DEFAULT ? nb_sequence : 1); + + forward[seq_length - 1][nb_segment - 1][i] = -((double)(count * seq_length) / 2.) * + (log(-forward[seq_length - 1][nb_segment - 1][i] / + (count * seq_length)) + log(2 * M_PI) + 1); +/* forward[seq_length - 1][nb_segment - 1][i] = -((double)(count * seq_length) / 2.) * + (log(-forward[seq_length - 1][nb_segment - 1][i] / + (count * (seq_length - nb_segment))) + log(2 * M_PI)) - + (double)(count * (seq_length - nb_segment)) / 2.; */ + } + else { + forward[seq_length - 1][nb_segment - 1][i] = D_INF; + } + } + + if (i == 0) { + segmentation_likelihood = forward[seq_length - 1][nb_segment - 1][i]; + } + + if (forward[seq_length - 1][nb_segment - 1][i] != D_INF) { + likelihood_cumul += exp(forward[seq_length - 1][nb_segment - 1][i]); + if (likelihood != D_INF) { + posterior_probability_cumul += exp(forward[seq_length - 1][nb_segment - 1][i] - likelihood); + } + } + +# ifdef DEBUG + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (j = 0;j < seq_length;j++) { +// if (((i == 0) || (*psegment != *(psegment - 1))) && +// (forward[seq_length - 1][nb_segment - 1][i] > segment_probability[j][*psegment])) { + if (forward[seq_length - 1][nb_segment - 1][i] > segment_probability[j][*psegment]) { + segment_probability[j][*psegment] = forward[seq_length - 1][nb_segment - 1][i]; + } + psegment++; + } +# endif + +# ifdef MESSAGE + if (inb_segmentation >= 1000) { + + // approximation of smoothed probabilities + + buff = exp(forward[seq_length - 1][nb_segment - 1][i]); + approximated_likelihood += buff; + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (j = 0;j < seq_length;j++) { + smoothed_probability[j][*psegment++] += buff; + } + } +# endif + + nb_cell = 0; + for (j = 0;j < seq_length;j++) { + for (k = 0;k < nb_segment;k++) { + if (active_cell[j][k]) { + nb_cell++; + } + } + } + +# ifdef MESSAGE + if (i == 0) { + os << "\n"; + } + + switch (format) { + + case ASCII : { + if (inb_segmentation <= 200) { + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (j = 0;j < seq_length;j++) { + os << *psegment++ << " "; + } + + os << " " << i + 1 << " " << forward[seq_length - 1][nb_segment - 1][i] << " ("; + if (likelihood != D_INF) { + os << exp(forward[seq_length - 1][nb_segment - 1][i] - likelihood) << " "; + if (boost::math::isnan(likelihood_cumul)) { + os << likelihood_cumul / exp(likelihood); + } + else { + os << posterior_probability_cumul; + } + } + else { + os << exp(forward[seq_length - 1][nb_segment - 1][i] - segmentation_likelihood); + } + os << " " << nb_cell << ")" << endl; + + os << (nb_segment == 2 ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_CHANGE_POINTS]) << ": "; + + for (j = 1;j < nb_segment;j++) { + os << seq_index_parameter[change_point[j]]; + if (j < nb_segment - 1) { + os << ", "; + } + } + os << endl; + +/* psegment = int_sequence[index == I_DEFAULT ? 0 : index][0] + 1; + for (j = 1;j < seq_length;j++) { + if (*psegment != *(psegment - 1)) { + os << seq_index_parameter[j] << ", "; + } + psegment++; + } + os << endl; */ + + for (j = 1;j < nb_variable;j++) { + piecewise_linear_function_ascii_print(os , index , j , nb_segment , model_type[j - 1] , + common_contrast , change_point , seq_index_parameter , + mean[j] , variance[j] , intercept[j] , slope[j] , + autoregressive_coeff[j]); + } + } + break; + } + + case SPREADSHEET : { + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (j = 0;j < seq_length;j++) { + os << *psegment++ << "\t"; + } + + os << "\t" << i + 1 << "\t" << forward[seq_length - 1][nb_segment - 1][i] << "\t"; + if (likelihood != D_INF) { + os << exp(forward[seq_length - 1][nb_segment - 1][i] - likelihood) << "\t"; + if (boost::math::isnan(likelihood_cumul)) { + os << likelihood_cumul / exp(likelihood); + } + else { + os << posterior_probability_cumul; + } + } + else { + os << exp(forward[seq_length - 1][nb_segment - 1][i] - segmentation_likelihood); + } + os << "\t" << nb_cell << endl; + + for (j = 1;j < nb_variable;j++) { + piecewise_linear_function_spreadsheet_print(os , index , j , nb_segment , model_type[j - 1] , + common_contrast , change_point , seq_index_parameter , + mean[j] , variance[j] , intercept[j] , slope[j] , + autoregressive_coeff[j]); + } + + if (((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) && + (nb_segment == 2) && (inb_segmentation >= seq_length - 1)) { + first_change_point[i] = change_point[1]; + } + break; + } + } + +# endif + + } + +# ifdef MESSAGE + if (((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) && + (format == SPREADSHEET) && (nb_segment == 2) && (inb_segmentation >= seq_length - 1)) { + norm = 0.; + for (i = 0;i < seq_length - 1;i++) { + norm += exp(forward[seq_length - 1][nb_segment - 1][i]); + } + + os << "\n" << SEQ_label[SEQL_POSTERIOR_CHANGE_POINT_PROBABILITY] << "\n\n"; + + for (i = 1;i < seq_length;i++) { + for (j = 0;j < seq_length - 1;j++) { + if (first_change_point[j] == i) { + os << seq_index_parameter[i] << "\t" + << exp(forward[seq_length - 1][nb_segment - 1][j]) / norm << endl; + break; + } + } + } + + delete [] first_change_point; + } + + if (((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) && + (nb_segment > 2) && (inb_segmentation >= nb_segmentation_forward[seq_length - 1][nb_segment - 1])) { + norm = 0.; + for (i = 0;i < nb_segmentation_forward[seq_length - 1][nb_segment - 1];i++) { + norm += exp(forward[seq_length - 1][nb_segment - 1][i]); + } + + os << SEQ_label[SEQL_POSTERIOR_PROBABILITY] << ": " << exp(forward[seq_length - 1][nb_segment - 1][0]) / norm << endl; + } +# endif + +# ifdef DEBUG + if (((likelihood != D_INF) && (likelihood_cumul / exp(likelihood) > 0.8)) || + (segmentation_likelihood != D_INF)) { + if (likelihood != D_INF) { + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + if (segment_probability[i][j] != D_INF) { + segment_probability[i][j] = exp(segment_probability[i][j] - likelihood); + } + else { + segment_probability[i][j] = 0.; + } + } + } + + os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_SEGMENT_PROBABILITY] << "\n\n"; + } + + else { + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + if (segment_probability[i][j] != D_INF) { + segment_probability[i][j] = exp(segment_probability[i][j] - segmentation_likelihood); + } + else { + segment_probability[i][j] = 0.; + } + } + } + + os << "\n" << SEQ_label[SEQL_MAX_SEGMENT_LIKELIHOOD] << "\n\n"; + } + + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (j = 0;j < seq_length;j++) { + *psegment++ = I_DEFAULT; + } + + profile_ascii_print(os , index , nb_segment , segment_probability , + SEQ_label[SEQL_SEGMENT]); + } +# endif + +# ifdef MESSAGE + if (inb_segmentation >= 1000) { + double previous_cumul[3] , + cdf[10] = {0.5 , 0.75 , 0.9 , 0.95 , 0.975 , 0.99 , 0.995, 0.9975 , 0.999 , 1}; + long double divergence; + Distribution *segmentation; + + + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + smoothed_probability[i][j] /= approximated_likelihood; + } + } + + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (i = 0;i < seq_length;i++) { + *psegment++ = I_DEFAULT; + } + + os << "\n" << SEQ_label[SEQL_POSTERIOR_SEGMENT_PROBABILITY] << "\n\n"; + + profile_ascii_print(os , index , nb_segment , smoothed_probability , SEQ_label[SEQL_SEGMENT]); + + // approximation of the Kullback-Leibler divergence of the uniform distribution from the + // segmentation distribution + + segmentation = new Distribution(inb_segmentation); + likelihood_cumul = 0.; + divergence = 0.; + + if (likelihood != D_INF) { + for (i = 0;i < inb_segmentation;i++) { + segmentation->mass[i] = exp(forward[seq_length - 1][nb_segment - 1][i] - likelihood); + likelihood_cumul += exp(forward[seq_length - 1][nb_segment - 1][i] - likelihood); + segmentation->cumul[i] = likelihood_cumul; + + divergence += exp(forward[seq_length - 1][nb_segment - 1][i] - likelihood) * + (forward[seq_length - 1][nb_segment - 1][i] - likelihood + + log(nb_segmentation_forward[seq_length - 1][nb_segment - 1])); + } + + segmentation->complement = 1. - likelihood_cumul; + } + + else { + for (i = 0;i < inb_segmentation;i++) { + segmentation->mass[i] = exp(forward[seq_length - 1][nb_segment - 1][i]) / approximated_likelihood; + + divergence += exp(forward[seq_length - 1][nb_segment - 1][i]) / approximated_likelihood * + (forward[seq_length - 1][nb_segment - 1][i] - log(approximated_likelihood) + + log(nb_segmentation_forward[seq_length - 1][nb_segment - 1])); + } + + segmentation->cumul_computation(); + } + + segmentation->max_computation(); + segmentation->mean_computation(); + segmentation->variance_computation(); + + os << "\n" << SEQ_label[SEQL_SEGMENTATION_DIVERGENCE] << ": " << divergence << endl; + + os << "\n"; + segmentation->ascii_characteristic_print(os , true); + if (likelihood != D_INF) { + os << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_POSTERIOR_PROBABILITY] << ": " + << likelihood_cumul << " (" << inb_segmentation << " " + << SEQ_label[SEQL_SEGMENTATIONS]<< ")" << endl; + } + + delete segmentation; + + os << "\n"; + likelihood_cumul = 0.; + i = 0; + if (likelihood != D_INF) { + for (j = 0;j < inb_segmentation;j++) { + previous_cumul[0] = likelihood_cumul; + likelihood_cumul += exp(forward[seq_length - 1][nb_segment - 1][j]); + if (likelihood_cumul / exp(likelihood) > cdf[i]) { + os << j << " " << previous_cumul[0] / exp(likelihood) << " " + << likelihood_cumul / exp(likelihood) << " ("; +// os << j + 1 << " " << likelihood_cumul / exp(likelihood) << " ("; + if (i == 0) { + os << (j + 1) / nb_segmentation_forward[seq_length - 1][nb_segment - 1] << ")" << endl; + } + else { + os << likelihood_cumul / exp(likelihood) - previous_cumul[1] << " " + << (j + 1) / nb_segmentation_forward[seq_length - 1][nb_segment - 1] - previous_cumul[2] << ")" << endl; + } + + if (cdf[i] == 1) { + break; + } + previous_cumul[1] = likelihood_cumul / exp(likelihood); + previous_cumul[2] = (j + 1) / nb_segmentation_forward[seq_length - 1][nb_segment - 1]; + i++; + } + } + } + + else { + for (j = 0;j < inb_segmentation;j++) { + previous_cumul[0] = likelihood_cumul; + likelihood_cumul += exp(forward[seq_length - 1][nb_segment - 1][j]); + if (likelihood_cumul / approximated_likelihood > cdf[i]) { + os << j << " " << previous_cumul[0] / approximated_likelihood << " " + << likelihood_cumul / approximated_likelihood << " ("; +// os << j + 1 << " " << likelihood_cumul / approximated_likelihood << " ("; + if (i == 0) { + os << (j + 1) / nb_segmentation_forward[seq_length - 1][nb_segment - 1] << ")" << endl; + } + else { + os << likelihood_cumul / approximated_likelihood - previous_cumul[1] << " " + << (j + 1) / nb_segmentation_forward[seq_length - 1][nb_segment - 1] - previous_cumul[2] << ")" << endl; + } + + if (cdf[i] == 1) { + break; + } + previous_cumul[1] = likelihood_cumul / approximated_likelihood; + previous_cumul[2] = (j + 1) / nb_segmentation_forward[seq_length - 1][nb_segment - 1]; + i++; + } + } + } + +/* ofstream out_file("Spreadsheet/segmentation_probability.xld"); + + likelihood_cumul = 0.; + if (likelihood != D_INF) { + for (i = 0;i < inb_segmentation;i++) { + likelihood_cumul += exp(forward[seq_length - 1][nb_segment - 1][i]); + out_file << i + 1 << "\t" << exp(forward[seq_length - 1][nb_segment - 1][i] - likelihood) << "\t" + << likelihood_cumul / exp(likelihood) << "\t" + << 1. / nb_segmentation_forward[seq_length - 1][nb_segment - 1] << endl; + } + } + + else { + for (i = 0;i < inb_segmentation;i++) { + likelihood_cumul += exp(forward[seq_length - 1][nb_segment - 1][i]); + out_file << i + 1 << "\t" << exp(forward[seq_length - 1][nb_segment - 1][i]) / approximated_likelihood << "\t" + << likelihood_cumul / approximated_likelihood << "\t" + << 1. / nb_segmentation_forward[seq_length - 1][nb_segment - 1] << endl; + } + } */ + } +# endif + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] factorial[i][j]; + } + delete [] factorial[i]; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] binomial_coeff[i][j]; + } + delete [] binomial_coeff[i]; + } + + delete [] seq_mean[i]; + delete [] hyperparam[i]; + } + delete [] factorial; + delete [] inf_bound_parameter; + delete [] binomial_coeff; + delete [] seq_mean; + delete [] hyperparam; + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + delete [] contrast; + + for (i = 0;i < seq_length;i++) { + delete [] nb_segmentation_forward[i]; + } + delete [] nb_segmentation_forward; + + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + delete [] forward[i][j]; + } + delete [] forward[i]; + } + delete [] forward; + + delete [] nb_segmentation; + delete [] rank; + + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + delete [] optimal_length[i][j]; + } + delete [] optimal_length[i]; + } + delete [] optimal_length; + + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + delete [] optimal_rank[i][j]; + } + delete [] optimal_rank[i]; + } + delete [] optimal_rank; + + delete [] change_point; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + delete [] mean[i][j]; + delete [] variance[i][j]; + } + } + } + + else { + delete [] mean[i][0]; + delete [] variance[i][0]; + } + + delete [] mean[i]; + delete [] variance[i]; + } + + else if ((model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + delete [] intercept[i][j]; + delete [] slope[i][j]; + delete [] variance[i][j]; + } + } + } + + else { + delete [] intercept[i][0]; + delete [] slope[i][0]; + delete [] variance[i][0]; + } + + delete [] intercept[i]; + delete [] slope[i]; + delete [] variance[i]; + } + + else if ((model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + delete [] mean[i][j]; + delete [] autoregressive_coeff[i][j]; + delete [] variance[i][j]; + } + } + } + + else { + delete [] mean[i][0]; + delete [] autoregressive_coeff[i][0]; + delete [] variance[i][0]; + } + + delete [] mean[i]; + delete [] autoregressive_coeff[i]; + delete [] variance[i]; + } + } + + delete [] mean; + delete [] variance; + delete [] intercept; + delete [] slope; + delete [] autoregressive_coeff; + + for (i = 0;i < seq_length;i++) { + delete [] active_cell[i]; + } + delete [] active_cell; + +# ifdef DEBUG + for (i = 0;i < seq_length;i++) { + delete [] segment_probability[i]; + } + delete [] segment_probability; +# endif + +# ifdef MESSAGE + if (inb_segmentation >= 1000) { + for (i = 0;i < seq_length;i++) { + delete [] smoothed_probability[i]; + } + delete [] smoothed_probability; + } +# endif + + return segmentation_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation by maximization of segment or change-point profiles for + * a single sequence or a sample of sequences. + * + * \param[in] index sequence index, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] rank ranks (for ordinal variables), + * \param[in] os stream, + * \param[in] plot_set pointer on a MultiPlotSet object, + * \param[in] output output type, + * \param[in] format output format (ASCII/SPREADSHEET/GNUPLOT/PLOT), + * \param[in] likelihood log-likelihood of the multiple change-point model. + * + * \return log-likelihood of the optimal segmentation. + */ +/*--------------------------------------------------------------*/ + +double Sequences::forward_backward_dynamic_programming(int index , int nb_segment , + segment_model *model_type , bool common_contrast , + double *shape_parameter , double **rank , + ostream *os , MultiPlotSet *plot_set , + change_point_profile output , output_format format , + double likelihood) const + +{ + int i , j , k , m; + int seq_length , count , *inf_bound_parameter , *seq_index_parameter , *change_point , *psegment , + **optimal_length; + double buff , segmentation_likelihood , backward_max , **seq_mean , **hyperparam , **forward , + **backward , **backward_output , **output_piecewise_function , ***piecewise_function , + ***factorial , ***binomial_coeff; + long double *contrast; + + + factorial = new double**[nb_variable]; + inf_bound_parameter = new int[nb_variable]; + binomial_coeff = new double**[nb_variable]; + seq_mean = new double*[nb_variable]; + seq_index_parameter = NULL; + hyperparam = new double*[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + + // computation of log of factorials for Poisson models + + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + factorial[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + factorial[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + factorial[i][j][k] = log_factorial(int_sequence[j][i][k]); + } + } + else { + factorial[i][j] = NULL; + } + } + } + + else { + factorial[i] = NULL; + } + + // computation of log of binomial coefficients for negative binomial models + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + switch (model_type[i - 1]) { + case NEGATIVE_BINOMIAL_0_CHANGE : + inf_bound_parameter[i - 1] = 0; + break; + case NEGATIVE_BINOMIAL_1_CHANGE : + inf_bound_parameter[i - 1] = 1; + break; + } + + binomial_coeff[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + binomial_coeff[i][j] = new double[length[j]]; + for (k = 0;k < length[j];k++) { + binomial_coeff[i][j][k] = log_binomial_coefficient(inf_bound_parameter[i - 1] , shape_parameter[i - 1] , + int_sequence[j][i][k]); + } + } + else { + binomial_coeff[i][j] = NULL; + } + } + } + + else { + binomial_coeff[i] = NULL; + } + + // computation of sequence means for Gaussian change in the variance models or + // stationary piecewise autoregressive models + + if ((model_type[i - 1] == VARIANCE_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) { + if ((index != I_DEFAULT) || (!common_contrast)) { + seq_mean[i] = new double[nb_sequence]; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += int_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + seq_mean[i][j] = 0.; + for (k = 0;k < length[j];k++) { + seq_mean[i][j] += real_sequence[j][i][k]; + } + seq_mean[i][j] /= length[j]; + } + } + } + } + + else { + seq_mean[i] = new double[1]; + seq_mean[i][0] = 0.; + + if (type[i] != REAL_VALUE) { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += int_sequence[k][i][j]; + } + } + } + + else { + for (j = 0;j < length[0];j++) { + for (k = 0;k < nb_sequence;k++) { + seq_mean[i][0] += real_sequence[k][i][j]; + } + } + } + + seq_mean[i][0] /= (nb_sequence * length[0]); + } + } + + else { + seq_mean[i] = NULL; + } + + if (((i == 1) && (model_type[0] == INTERCEPT_SLOPE_CHANGE)) || + ((model_type[i - 1] == LINEAR_MODEL_CHANGE) && (!seq_index_parameter))) { + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = index_parameter[index == I_DEFAULT ? 0 : index]; + } + } + + // computation of hyperparameters for Bayesian Poisson and Gaussian models + + if (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) { + hyperparam[i] = new double[2]; + gamma_hyperparameter_computation(index , i , hyperparam[i]); + } + else if (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE) { + hyperparam[i] = new double[4]; + gaussian_gamma_hyperparameter_computation(index , i , hyperparam[i]); + } + else { + hyperparam[i] = NULL; + } + } + + seq_length = length[index == I_DEFAULT ? 0 : index]; + contrast = new long double[seq_length]; + + forward = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + forward[i] = new double[nb_segment]; + } + + optimal_length = new int*[seq_length]; + for (i = 0;i < seq_length;i++) { + optimal_length[i] = new int[nb_segment]; + } + + backward = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + backward[i] = new double[nb_segment]; + } + + backward_output = new double*[seq_length]; + for (i = 0;i < seq_length;i++) { + backward_output[i] = new double[nb_segment]; + } + + piecewise_function = new double**[nb_variable]; + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + piecewise_function[i] = new double*[nb_sequence]; + for (j = 0;j < nb_sequence;j++) { + if ((index == I_DEFAULT) || (index == j)) { + piecewise_function[i][j] = new double[length[j]]; + } + else { + piecewise_function[i][j] = NULL; + } + } + } + + else { + piecewise_function[i] = NULL; + } + } + + // forward recurrence + + for (i = 0;i < seq_length;i++) { + + // computation of segment contrast functions (log-likelihoods or sum of squared deviations) + + forward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + for (j = 0;j < nb_segment;j++) { + forward[i][j] = D_INF; + } + + for (j = MAX(0 , nb_segment + i - seq_length);j < MIN((i < seq_length - 1 ? nb_segment - 1 : nb_segment) , i + 1);j++) { + if (j == 0) { + forward[i][j] = contrast[0]; + if (forward[i][j] != D_INF) { + optimal_length[i][j] = i + 1; + } + } + + else { + for (k = i;k >= j;k--) { + if ((contrast[k] != D_INF) && (forward[k - 1][j - 1] != D_INF)) { + buff = contrast[k] + forward[k - 1][j - 1]; + if (buff > forward[i][j]) { + forward[i][j] = buff; + optimal_length[i][j] = i - k + 1; + } + } + } + } + } + } + + if (forward[seq_length - 1][nb_segment - 1] == D_INF) { + segmentation_likelihood = D_INF; + } + + else { + + // restoration + + change_point = new int[nb_segment + 1]; + i = seq_length - 1; + change_point[nb_segment] = seq_length; + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0] + i; + + for (j = nb_segment - 1;j >= 0;j--) { + for (k = i;k > i - optimal_length[i][j];k--) { + *psegment-- = j; + } + i -= optimal_length[i][j]; + change_point[j] = i + 1; + } + + if (index == I_DEFAULT) { + for (i = 1;i < nb_sequence;i++) { + for (j = 0;j < length[0];j++) { + int_sequence[i][0][j] = int_sequence[0][0][j]; + } + } + } + + // backward recurrence + + for (i = seq_length - 1;i >= 0;i--) { + + // computation of segment contrast functions (log-likelihoods or sum of squared deviations) + + backward_contrast(i , index , model_type , common_contrast , factorial , + shape_parameter , binomial_coeff , seq_mean , seq_index_parameter , + hyperparam , rank , contrast); + + for (j = 0;j < nb_segment;j++) { + backward_output[i][j] = D_INF; + } + + for (j = MAX((i == 0 ? 0 : 1) , nb_segment + i - seq_length);j < MIN(nb_segment , i + 1);j++) { + if (j < nb_segment - 1) { + backward[i][j] = D_INF; + for (k = seq_length + j - nb_segment;k >= i;k--) { + if ((contrast[k] != D_INF) && (backward[k + 1][j + 1] != D_INF)) { + buff = contrast[k] + backward[k + 1][j + 1]; + if (buff > backward[i][j]) { + backward[i][j] = buff; + } + } + + if ((output == SEGMENT) && (k > i) && (backward[i][j] != D_INF)) { + if (i == 0) { + if (backward[i][j] > backward_output[k][j]) { + backward_output[k][j] = backward[i][j]; + } + } + else if (forward[i - 1][j - 1] != D_INF) { + buff = forward[i - 1][j - 1] + backward[i][j]; + if (buff > backward_output[k][j]) { + backward_output[k][j] = buff; + } + } + } + } + } + + else { + backward[i][j] = contrast[seq_length - 1]; + + if ((output == SEGMENT) && (forward[i - 1][j - 1] != D_INF) && + (backward[i][j] != D_INF)) { + buff = forward[i - 1][j - 1] + backward[i][j]; + for (k = seq_length - 1;k > i;k--) { + if (buff > backward_output[k][j]) { + backward_output[k][j] = buff; + } + } + } + } + + if (backward[i][j] != D_INF) { + if (i == 0) { + backward_output[i][j] = backward[i][j]; + } + else if (forward[i - 1][j - 1] != D_INF) { + backward_output[i][j] = forward[i - 1][j - 1] + backward[i][j]; + } + } + } + } + +# ifdef DEBUG + cout << "\n"; + for (i = 1;i < seq_length;i++) { + cout << i; + for (j = 0;j < nb_segment;j++) { + if (j == 0) { + cout << " | " << backward[i][j]; + } + else { + cout << " | " << ((forward[i - 1][j - 1] != D_INF) && (backward[i][j] != D_INF) ? forward[i - 1][j - 1] + backward[i][j] : D_INF); + } + cout << " " << backward_output[i][j]; + } + cout << endl; + } + cout << endl; +# endif + + // restoration + +# ifdef MESSAGE + if (output == SEGMENT) { + int optimal_segment; + + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + + for (i = 0;i < seq_length;i++) { + backward_max = D_INF; + for (j = 0;j < nb_segment;j++) { + if (backward_output[i][j] > backward_max) { + backward_max = backward_output[i][j]; + optimal_segment = j; + } + } + + if (optimal_segment != *psegment) { + cout << "\nERROR: " << i << " | " << *psegment << " " << optimal_segment << endl; + } + + psegment++; + } + } +# endif + + for (i = 1;i < nb_variable;i++) { + piecewise_linear_function(index , i , nb_segment , model_type[i - 1] , common_contrast , + change_point , seq_index_parameter , piecewise_function[i]); + } + +# ifdef MESSAGE + if ((backward[0][0] < forward[seq_length - 1][nb_segment - 1] - DOUBLE_ERROR) || + (backward[0][0] > forward[seq_length - 1][nb_segment - 1] + DOUBLE_ERROR)) { + cout << "\nERROR: " << backward[0][0] << " | " << forward[seq_length - 1][nb_segment - 1] << endl; + } +/* if ((backward_output[0][0] < backward[0][0] - DOUBLE_ERROR) || + (backward_output[0][0] > backward[0][0] + DOUBLE_ERROR)) { + cout << "\nERROR: " << backward_output[0][0] << " | " << backward[0][0] << endl; + } */ +# endif + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + segmentation_likelihood = forward[seq_length - 1][nb_segment - 1]; + + if (likelihood != D_INF) { + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + if (backward_output[i][j] != D_INF) { + backward_output[i][j] = exp(backward_output[i][j] - likelihood); + } + else { + backward_output[i][j] = 0.; + } + } + } + } + + else if (segmentation_likelihood != D_INF) { + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + if (backward_output[i][j] != D_INF) { + backward_output[i][j] = exp(backward_output[i][j] - segmentation_likelihood); + } + else { + backward_output[i][j] = 0.; + } + } + } + } + } + + else { + if (forward[seq_length - 1][nb_segment - 1] < 0.) { + count = (index == I_DEFAULT ? nb_sequence : 1); + + segmentation_likelihood = -((double)(count * seq_length) / 2.) * + (log(-forward[seq_length - 1][nb_segment - 1] / + (count * seq_length)) + log(2 * M_PI) + 1); +/* segmentation_likelihood = -((double)(count * seq_length) / 2.) * + (log(-forward[seq_length - 1][nb_segment - 1] / + (count * (seq_length - nb_segment))) + log(2 * M_PI)) - + (double)(count * (seq_length - nb_segment)) / 2.; */ + + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + if (backward_output[i][j] < 0.) { + backward_output[i][j] = pow(backward_output[i][j] / forward[seq_length - 1][nb_segment - 1] , + -((double)(count * seq_length) / 2.)); +/* backward_output[i][j] = exp(-((double)(count * seq_length) / 2.) * + log(backward_output[i][j] / forward[seq_length - 1][nb_segment - 1])); */ + } + else { + backward_output[i][j] = 0.; + } + } + } + } + + else { + segmentation_likelihood = D_INF; + } + } + + if (segmentation_likelihood == D_INF) { + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + backward_output[i][j] = 0.; + } + } + } + + switch (format) { + + case ASCII : { + if (likelihood != D_INF) { + switch (output) { + case CHANGE_POINT : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_CHANGE_POINT_PROBABILITY] << "\n\n"; + break; + case SEGMENT : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_SEGMENT_PROBABILITY] << "\n\n"; + break; + } + } + + else { + switch (output) { + case CHANGE_POINT : + *os << "\n" << SEQ_label[SEQL_MAX_CHANGE_POINT_LIKELIHOOD] << "\n\n"; + break; + case SEGMENT : + *os << "\n" << SEQ_label[SEQL_MAX_SEGMENT_LIKELIHOOD] << "\n\n"; + break; + } + } + + if ((index != I_DEFAULT) || (common_contrast)) { + output_piecewise_function = new double*[nb_variable]; + + for (i = 1;i < nb_variable;i++) { + if (piecewise_function[i]) { + output_piecewise_function[i] = piecewise_function[i][index == I_DEFAULT ? 0 : index]; + } + else { + output_piecewise_function[i] = NULL; + } + } + } + + else { + output_piecewise_function = NULL; + } + + profile_ascii_print(*os , index , nb_segment , backward_output , + (output == CHANGE_POINT ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_SEGMENT]) , + output_piecewise_function); + delete [] output_piecewise_function; + + *os << "\n" << SEQ_label[SEQL_SEGMENTATION_LIKELIHOOD] << ": " << segmentation_likelihood; + if (likelihood != D_INF) { + *os << " (" << exp(segmentation_likelihood - likelihood) << ")"; + } + *os << endl; + break; + } + + case SPREADSHEET : { + if (likelihood != D_INF) { + switch (output) { + case CHANGE_POINT : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_CHANGE_POINT_PROBABILITY] << "\n\n"; + break; + case SEGMENT : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_SEGMENT_PROBABILITY] << "\n\n"; + break; + } + } + + else { + switch (output) { + case CHANGE_POINT : + *os << "\n" << SEQ_label[SEQL_MAX_CHANGE_POINT_LIKELIHOOD] << "\n\n"; + break; + case SEGMENT : + *os << "\n" << SEQ_label[SEQL_MAX_SEGMENT_LIKELIHOOD] << "\n\n"; + break; + } + } + + profile_spreadsheet_print(*os , index , nb_segment , backward_output , + (output == CHANGE_POINT ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_SEGMENT]) , + common_contrast , piecewise_function); + + *os << "\n" << SEQ_label[SEQL_SEGMENTATION_LIKELIHOOD] << "\t" << segmentation_likelihood; + if (likelihood != D_INF) { + *os << "\t" << exp(segmentation_likelihood - likelihood); + } + *os << endl; + break; + } + + case GNUPLOT : { + profile_plot_print(*os , index , nb_segment , backward_output , common_contrast , piecewise_function); + break; + } + + case PLOT : { + MultiPlotSet &plot = *plot_set; + + i = 0; + for (j = 1;j < nb_variable;j++) { + if ((piecewise_function) && (piecewise_function[j])) { + plot[i].resize(2); + + if ((index != I_DEFAULT) || (!common_contrast)) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < nb_sequence;k++) { + if ((index == I_DEFAULT) || (index == k)) { + for (m = 0;m < length[k];m++) { + plot[i][0].add_point(seq_index_parameter[m] , int_sequence[k][j][m]); + plot[i][1].add_point(seq_index_parameter[m] , piecewise_function[j][k][m]); + } + } + } + } + else { + for (k = 0;k < nb_sequence;k++) { + if ((index == I_DEFAULT) || (index == k)) { + for (m = 0;m < length[k];m++) { + plot[i][0].add_point(seq_index_parameter[m] , real_sequence[k][j][m]); + plot[i][1].add_point(seq_index_parameter[m] , piecewise_function[j][k][m]); + } + } + } + } + } + + else { + if (type[j] != REAL_VALUE) { + for (k = 0;k < nb_sequence;k++) { + for (m = 0;m < length[k];m++) { + plot[i][0].add_point(seq_index_parameter[m] , int_sequence[k][j][m]); + } + } + } + else { + for (k = 0;k < nb_sequence;k++) { + for (m = 0;m < length[k];m++) { + plot[i][0].add_point(seq_index_parameter[m] , real_sequence[k][j][m]); + } + } + } + for (k = 0;k < length[0];k++) { + plot[i][1].add_point(seq_index_parameter[k] , piecewise_function[j][0][k]); + } + } + + i++; + } + } + + profile_plotable_write(plot[i] , index , nb_segment , backward_output); + break; + } + } + +# ifdef MESSAGE + if (format != GNUPLOT) { + double ambiguity = 0.; + + psegment = int_sequence[index == I_DEFAULT ? 0 : index][0]; + for (i = 0;i < seq_length;i++) { + for (j = 0;j < nb_segment;j++) { + if (j != *psegment) { + ambiguity += backward_output[i][j]; + } + } + psegment++; + } + + if (likelihood != D_INF) { + ambiguity *= exp(likelihood - segmentation_likelihood); + } + + switch (format) { + case ASCII : + *os << "\n" << SEQ_label[SEQL_AMBIGUITY] << ": " << ambiguity + << " (" << ambiguity / seq_length << ")" << endl; + break; + case SPREADSHEET : + *os << "\n" << SEQ_label[SEQL_AMBIGUITY] << "\t" << ambiguity + << "\t" << ambiguity / seq_length << "\t" << endl; + break; + } + } +# endif + + delete [] change_point; + } + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_POISSON_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] factorial[i][j]; + } + delete [] factorial[i]; + } + + if ((model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] binomial_coeff[i][j]; + } + delete [] binomial_coeff[i]; + } + + delete [] seq_mean[i]; + delete [] hyperparam[i]; + } + delete [] factorial; + delete [] inf_bound_parameter; + delete [] binomial_coeff; + delete [] seq_mean; + delete [] hyperparam; + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + + delete [] contrast; + + for (i = 0;i < seq_length;i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 0;i < seq_length;i++) { + delete [] optimal_length[i]; + } + delete [] optimal_length; + + for (i = 0;i < seq_length;i++) { + delete [] backward[i]; + } + delete [] backward; + + for (i = 0;i < seq_length;i++) { + delete [] backward_output[i]; + } + delete [] backward_output; + + for (i = 1;i < nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + for (j = 0;j < nb_sequence;j++) { + delete [] piecewise_function[i][j]; + } + delete [] piecewise_function[i]; + } + } + delete [] piecewise_function; + + return segmentation_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the N most probable segmentations, of segment/change-point profiles and + * entropy profiles for a single sequence or a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output type, + * \param[in] format output format (ASCII/SPREADSHEET), + * \param[in] segmentation method for computing segmentations (FORWARD_DYNAMIC_PROGRAMMING/ FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_segmentation number of segmentations. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::segment_profile_write(StatError &error , ostream &os , int iidentifier , + int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + change_point_profile output , output_format format , + latent_structure_algorithm segmentation , int nb_segmentation) const + +{ + bool status = true; + int i , j; + int index; + double segment_length_max , likelihood = D_INF , segmentation_likelihood , **rank; + Sequences *seq; + + + error.init(); + +/* if (((index_param_type == TIME) && (index_interval->variance > 0.)) || + (index_param_type == POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + if (index_param_type == POSITION) { + status = false; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE] , SEQ_index_parameter_word[TIME]); + } */ + + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == POISSON_CHANGE) || + (model_type[i] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i] == BAYESIAN_POISSON_CHANGE)) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (((model_type[i] != NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 0)) || + ((model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 1))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else if (model_type[i] == CATEGORICAL_CHANGE) { + if ((marginal_distribution[i]->nb_value < 2) || + (marginal_distribution[i]->nb_value > NB_OUTPUT)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_VALUE]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + else if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (((model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && + (index_param_type != IMPLICIT_TYPE) && (index_interval->variance > 0.)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + } + + if (iidentifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (iidentifier == identifier[i]) { + index = i; + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + else { + index = I_DEFAULT; + if (length_distribution->variance > 0.) { + status = false; + error.update(SEQ_error[SEQR_VARIABLE_SEQUENCE_LENGTH]); + } + } + + if (status) { + if ((nb_segment < 2) || (nb_segment > length[index == I_DEFAULT ? 0 : index] / 2)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEGMENT]); + } + + if (nb_segmentation < 2) { + status = false; + error.update(SEQ_error[SEQR_NB_SEGMENTATION]); + } + } + + if (status) { + seq = new Sequences(*this , ADD_STATE_VARIABLE); + + // rank computation for ordinal variables + + rank = new double*[seq->nb_variable]; + + for (i = 1;i < seq->nb_variable;i++) { + if (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) { + rank[i] = seq->marginal_distribution[i]->rank_computation(); + } + else { + rank[i] = NULL; + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + likelihood = seq->forward_backward(index , nb_segment , model_type , common_contrast , + shape_parameter , rank , &os , NULL , + segment_length_max , output , format); + } + segmentation_likelihood = seq->forward_backward_dynamic_programming(index , nb_segment , model_type , + common_contrast , shape_parameter , + rank , &os , NULL , output , format , + likelihood); + if (segmentation_likelihood == D_INF) { + status = false; + error.update(SEQ_error[SEQR_SEGMENTATION_FAILURE]); + } + + else if ((format == ASCII) || (length[index == I_DEFAULT ? 0 : index] <= 400)) { + switch (segmentation) { + case FORWARD_DYNAMIC_PROGRAMMING : + seq->N_segmentation(index , nb_segment , model_type , common_contrast , shape_parameter , + rank , os , format , nb_segmentation , likelihood); + break; + case FORWARD_BACKWARD_SAMPLING : + seq->forward_backward_sampling(index , nb_segment , model_type , common_contrast , + shape_parameter , rank , os , format , nb_segmentation); + break; + } + } + + delete seq; + + for (i = 1;i < seq->nb_variable;i++) { + delete [] rank[i]; + } + delete [] rank; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the N most probable segmentations, of segment/change-point profiles and + * entropy profiles for a single sequence or a sample of sequences and displaying the results. + * + * \param[in] error reference on a StatError object, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output type, + * \param[in] segmentation method for computing segmentations (FORWARD_DYNAMIC_PROGRAMMING/ FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_segmentation number of segmentations. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::segment_profile_ascii_write(StatError &error , int iidentifier , + int nb_segment , vector &model_type , + bool common_contrast , vector &shape_parameter , + change_point_profile output , + latent_structure_algorithm segmentation , int nb_segmentation) const + +{ + return segment_profile_write(error , cout , iidentifier , nb_segment , model_type.data() , + common_contrast , shape_parameter.data() , output , + ASCII , segmentation , nb_segmentation); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the N most probable segmentations, of segment/change-point profiles and + * entropy profiles for a single sequence or a sample of sequences and + * writing of the results in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output type, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] segmentation method for computing segmentations (FORWARD_DYNAMIC_PROGRAMMING/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_segmentation number of segmentations. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::segment_profile_write(StatError &error , const string path , int iidentifier , + int nb_segment , vector &model_type , + bool common_contrast , vector &shape_parameter , + change_point_profile output , output_format format , + latent_structure_algorithm segmentation , int nb_segmentation) const + +{ + bool status = true; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = segment_profile_write(error , out_file , iidentifier , nb_segment , model_type.data() , + common_contrast , shape_parameter.data() , output , format , + segmentation , nb_segmentation); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of segment/change-point profiles and of entropy profiles for + * a single sequence or a sample of sequences and plot of the profiles + * at the Gnuplot format. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output type, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::segment_profile_plot_write(StatError &error , const char *prefix , int iidentifier , + int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + change_point_profile output , const char *title) const + +{ + bool status = true; + int i , j , k , m; + int index , seq_length , *seq_index_parameter; + double segment_length_max , likelihood = D_INF , segmentation_likelihood , **rank; + Sequences *seq; + ostringstream data_file_name[2]; + ofstream *out_data_file; + + + error.init(); + +/* if (((index_param_type == TIME) && (index_interval->variance > 0.)) || + (index_param_type == POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + if (index_param_type == POSITION) { + status = false; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE] , SEQ_index_parameter_word[TIME]); + } */ + + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == POISSON_CHANGE) || + (model_type[i] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i] == BAYESIAN_POISSON_CHANGE)) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (((model_type[i] != NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 0)) || + ((model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 1))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else if (model_type[i] == CATEGORICAL_CHANGE) { + if ((marginal_distribution[i]->nb_value < 2) || + (marginal_distribution[i]->nb_value > NB_OUTPUT)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_VALUE]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + else if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (((model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && + (index_param_type != IMPLICIT_TYPE) && (index_interval->variance > 0.)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + } + + if (iidentifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (iidentifier == identifier[i]) { + index = i; + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + else { + index = I_DEFAULT; + if (length_distribution->variance > 0.) { + status = false; + error.update(SEQ_error[SEQR_VARIABLE_SEQUENCE_LENGTH]); + } + } + + if ((status) && ((nb_segment < 2) || (nb_segment > length[index == I_DEFAULT ? 0 : index] / 2))) { + status = false; + error.update(SEQ_error[SEQR_NB_SEGMENT]); + } + + if (status) { + + // writing of the data files + + i = (((model_type[0] == MEAN_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE)) ? 0 : 1); + data_file_name[i] << prefix << i << ".dat"; + out_data_file = new ofstream((data_file_name[i].str()).c_str()); + + if (!out_data_file) { + status = false; + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + else { + seq = new Sequences(*this , ADD_STATE_VARIABLE); + + // rank computation for ordinal variables + + rank = new double*[seq->nb_variable]; + + for (i = 1;i < seq->nb_variable;i++) { + if (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) { + rank[i] = seq->marginal_distribution[i]->rank_computation(); + } + else { + rank[i] = NULL; + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + likelihood = seq->forward_backward(index , nb_segment , model_type , common_contrast , + shape_parameter , rank , out_data_file , NULL , + segment_length_max , output , GNUPLOT); + out_data_file->close(); + delete out_data_file; + + data_file_name[0] << prefix << 0 << ".dat"; + out_data_file = new ofstream((data_file_name[0].str()).c_str()); + } + +# ifdef DEBUG + likelihood = D_INF; +# endif + + segmentation_likelihood = seq->forward_backward_dynamic_programming(index , nb_segment , model_type , + common_contrast , shape_parameter , + rank , out_data_file , NULL , + output , GNUPLOT , likelihood); + out_data_file->close(); + delete out_data_file; + + if (segmentation_likelihood == D_INF) { + status = false; + error.update(SEQ_error[SEQR_SEGMENTATION_FAILURE]); + } + + else { + seq_length = seq->length[index == I_DEFAULT ? 0 : index]; + + if (index_param_type == IMPLICIT_TYPE) { + seq_index_parameter = new int[seq_length]; + for (j = 0;j < seq_length;j++) { + seq_index_parameter[j] = j; + } + } + else { + seq_index_parameter = seq->index_parameter[index == I_DEFAULT ? 0 : index]; + } + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n"; + +// if (index_parameter) { + if (seq_index_parameter[seq_length - 1] - seq_index_parameter[0] < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + j = 2; + for (k = 1;k < seq->nb_variable;k++) { + if ((model_type[k - 1] == POISSON_CHANGE) || (model_type[k - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[k - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[k - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[k - 1] == VARIANCE_CHANGE) || + (model_type[k - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE) || + (model_type[k - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[k - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[k - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[k - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + out_file << "set title \""; + if (title) { + out_file << title; + if (seq->nb_variable > 2) { + out_file << " - "; + } + } + + if (seq->nb_variable > 2) { + out_file << STAT_label[STATL_VARIABLE] << " " << k; + } + out_file << "\n\n"; + + out_file << "plot [" << seq_index_parameter[0] << ":" + << seq_index_parameter[seq_length - 1] << "] [" + << MIN(seq->min_value[k] , 0) << ":" + << MAX(seq->max_value[k] , seq->min_value[k] + 1) << "] "; + + for (m = 0;m < nb_sequence;m++) { + if ((index == I_DEFAULT) || (index == m)) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << 1 << " : " << j++; + + if (index == m) { + out_file << " title \"" << SEQ_label[SEQL_SEQUENCE] << " " << iidentifier << "\""; + if ((index_interval) && (index_interval->variance > index_interval->mean)) { + out_file << " with points,\\" << endl; + } + else { + out_file << " with linespoints,\\" << endl; + } + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << 1 << " : " << j++ + << " title \"" << SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION] << "\" with lines" << endl; + } + + else { + out_file << " notitle with points,\\" << endl; + if (!common_contrast) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << 1 << " : " << j++ + << " notitle with lines"; + if (m < nb_sequence - 1) { + out_file << ",\\"; + } + out_file << endl; + } + } + } + } + + if ((index == I_DEFAULT) && (common_contrast)) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << 1 << " : " << j++ + << " notitle with lines" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + } + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + + if (likelihood != D_INF) { + switch (output) { + case CHANGE_POINT : + out_file << SEQ_label[SEQL_MAX_POSTERIOR_CHANGE_POINT_PROBABILITY] << "\"\n\n"; + break; + case SEGMENT : + out_file << SEQ_label[SEQL_MAX_POSTERIOR_SEGMENT_PROBABILITY] << "\"\n\n"; + break; + } + } + + else { + switch (output) { + case CHANGE_POINT : + out_file << SEQ_label[SEQL_MAX_CHANGE_POINT_LIKELIHOOD] << "\"\n\n"; + break; + case SEGMENT : + out_file << SEQ_label[SEQL_MAX_SEGMENT_LIKELIHOOD] << "\"\n\n"; + break; + } + } + + out_file << "plot [" << seq_index_parameter[0] << ":" + << seq_index_parameter[seq_length - 1]; + if (likelihood != D_INF) { + out_file << "] [0:" << exp(segmentation_likelihood - likelihood) << "] "; + } + else { + out_file << "] [0:1] "; + } + for (k = 0;k < nb_segment;k++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << j++ << " title \"" + << (output == CHANGE_POINT ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_SEGMENT]) + << " " << k << "\" with linespoints"; + if (k < nb_segment - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (likelihood != D_INF) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + switch (output) { + case CHANGE_POINT : + out_file << SEQ_label[SEQL_POSTERIOR_CHANGE_POINT_PROBABILITY] << "\"\n\n"; + break; + case SEGMENT : + out_file << SEQ_label[SEQL_POSTERIOR_SEGMENT_PROBABILITY] << "\"\n\n"; + break; + } + + out_file << "plot [" << seq_index_parameter[0] << ":" + << seq_index_parameter[seq_length - 1] << "] [0:1] "; + for (k = 0;k < nb_segment;k++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << k + 2 << " title \"" + << (output == CHANGE_POINT ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_SEGMENT]) + << " " << k << "\" with linespoints"; + if (k < nb_segment - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << SEQ_label[SEQL_POSTERIOR_CHANGE_POINT_PROBABILITY] << "\"\n\n"; + + out_file << "plot [" << seq_index_parameter[0] << ":" + << seq_index_parameter[seq_length - 1] << "] [0:1] "; + for (k = MAX(1 , nb_segment - 3);k < nb_segment;k++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << nb_segment + k + 1 << " title \"" << k + 1 << " " + << SEQ_label[SEQL_SEGMENTS] << "\" with linespoints"; + if (k < nb_segment - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << SEQ_label[SEQL_SEGMENT_LENGTH] << " " << STAT_label[STATL_DISTRIBUTIONS] << "\"\n\n"; + + out_file << "plot [" << 0 << ":" << seq_length - nb_segment + 1 << "] [0:" << segment_length_max << "] "; + for (k = 0;k < nb_segment;k++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 2 * nb_segment + k + 1 << " title \"" + << SEQ_label[SEQL_SEGMENT] << " " << k << "\" with linespoints" << ",\\" << endl; + } + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 3 * nb_segment + 1 << " title \"" + << SEQ_label[SEQL_PRIOR_SEGMENT_LENGTH] << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << SEQ_label[SEQL_BEGIN_CONDITIONAL_ENTROPY] << "\"\n\n"; + + out_file << "plot [" << seq_index_parameter[0] << ":" + << seq_index_parameter[seq_length - 1] + << "] [0:" << log(2.) << "] "; + for (k = MAX(1 , nb_segment - 3);k < nb_segment;k++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << 3 * nb_segment + k + 1 << " title \"" << k + 1 << " " + << SEQ_label[SEQL_SEGMENTS] << "\" with linespoints"; + if (k < nb_segment - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << SEQ_label[SEQL_END_CONDITIONAL_ENTROPY] << "\"\n\n"; + + out_file << "plot [" << seq_index_parameter[0] << ":" + << seq_index_parameter[seq_length - 1] + << "] [0:" << log(2.) << "] "; + for (k = MAX(1 , nb_segment - 3);k < nb_segment;k++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << 4 * nb_segment + k << " title \"" << k + 1 << " " + << SEQ_label[SEQL_SEGMENTS] << "\" with linespoints"; + if (k < nb_segment - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [" << seq_index_parameter[0] << ":" + << seq_index_parameter[seq_length - 1] + << "] [0:" << log(2.) << "] " + << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << 4 * nb_segment << " title \"" + << SEQ_label[SEQL_BEGIN_CONDITIONAL_ENTROPY] << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << 5 * nb_segment - 1 << " title \"" + << SEQ_label[SEQL_END_CONDITIONAL_ENTROPY] << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << 5 * nb_segment << " title \"" + << SEQ_label[SEQL_CHANGE_POINT_ENTROPY] << "\" with linespoints" << endl; + } + + if (seq_index_parameter[seq_length - 1] - seq_index_parameter[0] < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + if (index_param_type == IMPLICIT_TYPE) { + delete [] seq_index_parameter; + } + } + + delete seq; + + for (i = 1;i < seq->nb_variable;i++) { + delete [] rank[i]; + } + delete [] rank; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of segment/change-point profiles and of entropy profiles for + * a single sequence or a sample of sequences and plot of the profiles. + * + * \param[in] error reference on a StatError object, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output type. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* Sequences::segment_profile_plotable_write(StatError &error , int iidentifier , + int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + change_point_profile output) const + +{ + bool status = true; + int i , j , k; + int index , nb_plot_set , segmentation_index , seq_length; + double segment_length_max , likelihood = D_INF , segmentation_likelihood , **rank; + Sequences *seq; + ostringstream title , legend; + MultiPlotSet *plot_set; + + + plot_set = NULL; + error.init(); + +/* if (((index_param_type == TIME) && (index_interval->variance > 0.)) || + (index_param_type == POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + if (index_param_type == POSITION) { + status = false; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE] , SEQ_index_parameter_word[TIME]); + } */ + + for (i = 0;i < nb_variable;i++) { + if ((model_type[i] == CATEGORICAL_CHANGE) || (model_type[i] == POISSON_CHANGE) || + (model_type[i] == NEGATIVE_BINOMIAL_0_CHANGE) || (model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) || + (model_type[i] == ORDINAL_GAUSSIAN_CHANGE) || (model_type[i] == BAYESIAN_POISSON_CHANGE)) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (((model_type[i] != NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 0)) || + ((model_type[i] == NEGATIVE_BINOMIAL_1_CHANGE) && (min_value[i] < 1))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else if (model_type[i] == CATEGORICAL_CHANGE) { + if ((marginal_distribution[i]->nb_value < 2) || + (marginal_distribution[i]->nb_value > NB_OUTPUT)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_VALUE]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + else if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (((model_type[i] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE)) && + (index_param_type != IMPLICIT_TYPE) && (index_interval->variance > 0.)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + } + + if (iidentifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (iidentifier == identifier[i]) { + index = i; + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + else { + index = I_DEFAULT; + if (length_distribution->variance > 0.) { + status = false; + error.update(SEQ_error[SEQR_VARIABLE_SEQUENCE_LENGTH]); + } + } + + if ((status) && ((nb_segment < 2) || (nb_segment > length[index == I_DEFAULT ? 0 : index] / 2))) { + status = false; + error.update(SEQ_error[SEQR_NB_SEGMENT]); + } + + if (status) { + seq = new Sequences(*this , ADD_STATE_VARIABLE); + + // computation of the number of plots + + nb_plot_set = 1; + for (i = 1;i < seq->nb_variable;i++) { + if ((model_type[i - 1] == POISSON_CHANGE) || (model_type[i - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[i - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[i - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[i - 1] == VARIANCE_CHANGE) || + (model_type[i - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE) || + (model_type[i - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[i - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[i - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[i - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + nb_plot_set++; + } + } + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + nb_plot_set += 6; + } + + plot_set = new MultiPlotSet(nb_plot_set); + + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + // rank computation for ordinal variables + + rank = new double*[seq->nb_variable]; + + for (i = 1;i < seq->nb_variable;i++) { + if (model_type[i - 1] == ORDINAL_GAUSSIAN_CHANGE) { + rank[i] = seq->marginal_distribution[i]->rank_computation(); + } + else { + rank[i] = NULL; + } + } + + if ((model_type[0] != MEAN_CHANGE) && (model_type[0] != INTERCEPT_SLOPE_CHANGE)) { + likelihood = seq->forward_backward(index , nb_segment , model_type ,common_contrast , + shape_parameter , rank , NULL , plot_set , + segment_length_max , output , PLOT); + } + +# ifdef DEBUG + likelihood = D_INF; +# endif + + segmentation_likelihood = seq->forward_backward_dynamic_programming(index , nb_segment , model_type , + common_contrast , shape_parameter , + rank , NULL , plot_set , + output , PLOT , likelihood); + + if (segmentation_likelihood == D_INF) { + delete plot_set; + plot_set = NULL; + error.update(SEQ_error[SEQR_SEGMENTATION_FAILURE]); + } + + else { + segmentation_index = (index == I_DEFAULT ? 0 : index); + seq_length = seq->length[segmentation_index]; + + i = 0; + + // scatterplot of sequences and piecewise linear fonction + + for (j = 1;j < seq->nb_variable;j++) { + if ((model_type[j - 1] == POISSON_CHANGE) || (model_type[j - 1] == NEGATIVE_BINOMIAL_0_CHANGE) || + (model_type[j - 1] == NEGATIVE_BINOMIAL_1_CHANGE) || (model_type[j - 1] == GAUSSIAN_CHANGE) || + (model_type[0] == MEAN_CHANGE) || (model_type[j - 1] == VARIANCE_CHANGE) || + (model_type[j - 1] == LINEAR_MODEL_CHANGE) || (model_type[0] == INTERCEPT_SLOPE_CHANGE) || + (model_type[j - 1] == AUTOREGRESSIVE_MODEL_CHANGE) || (model_type[j - 1] == STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE) || + (model_type[j - 1] == BAYESIAN_POISSON_CHANGE) || (model_type[j - 1] == BAYESIAN_GAUSSIAN_CHANGE)) { + if (seq->nb_variable > 2) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << j; + plot[i].title = title.str(); + } + + if (seq->index_parameter) { + plot[i].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq_length - 1]); + if (seq->index_parameter[index][seq_length - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + else { + plot[i].xrange = Range(0 , seq_length - 1); + if (seq_length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + plot[i].yrange = Range(MIN(seq->min_value[j] , 0) , MAX(seq->max_value[j] , seq->min_value[j] + 1)); + + legend.str(""); + legend << SEQ_label[SEQL_SEQUENCE] << " " << iidentifier; + plot[i][0].legend = legend.str(); + + if ((index == I_DEFAULT) || ((index_interval) && (index_interval->variance > index_interval->mean))) { + plot[i][0].style = "points"; + } + else { + plot[i][0].style = "linespoints"; + } + + plot[i][1].legend = SEQ_label[SEQL_PIECEWISE_LINEAR_FUNCTION]; + plot[i][1].style = "lines"; + i++; + } + } + + // maximum posterior probabilities + + if (likelihood != D_INF) { + switch (output) { + case CHANGE_POINT : + plot[i].title = SEQ_label[SEQL_MAX_POSTERIOR_CHANGE_POINT_PROBABILITY]; + break; + case SEGMENT : + plot[i].title = SEQ_label[SEQL_MAX_POSTERIOR_SEGMENT_PROBABILITY]; + break; + } + + plot[i].yrange = Range(0. , exp(segmentation_likelihood - likelihood)); + } + + else { + switch (output) { + case CHANGE_POINT : + plot[i].title = SEQ_label[SEQL_MAX_CHANGE_POINT_LIKELIHOOD]; + break; + case SEGMENT : + plot[i].title = SEQ_label[SEQL_MAX_SEGMENT_LIKELIHOOD]; + break; + } + + plot[i].yrange = Range(0. , 1.); + } + + if (seq->index_parameter) { + plot[i].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq_length - 1]); + if (seq->index_parameter[index][seq_length - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + else { + plot[i].xrange = Range(0 , seq_length - 1); + if (seq_length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + for (j = 0;j < nb_segment;j++) { + legend.str(""); + legend << (output == CHANGE_POINT ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_SEGMENT]) + << " " << j; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + } + i++; + + // smoothed probabilities + + if (likelihood != D_INF) { + switch (output) { + case CHANGE_POINT : + plot[i].title = SEQ_label[SEQL_POSTERIOR_CHANGE_POINT_PROBABILITY]; + break; + case SEGMENT : + plot[i].title = SEQ_label[SEQL_POSTERIOR_SEGMENT_PROBABILITY]; + break; + } + + if (seq->index_parameter) { + plot[i].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq_length - 1]); + if (seq->index_parameter[index][seq_length - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + else { + plot[i].xrange = Range(0 , seq_length - 1); + if (seq_length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + plot[i].yrange = Range(0. , 1.); + + for (j = 0;j < nb_segment;j++) { + legend.str(""); + legend << (output == CHANGE_POINT ? SEQ_label[SEQL_CHANGE_POINT] : SEQ_label[SEQL_SEGMENT]) + << " " << j; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + } + i++; + + // change-point profiles + + plot[i].title = SEQ_label[SEQL_POSTERIOR_CHANGE_POINT_PROBABILITY]; + + if (seq->index_parameter) { + plot[i].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq_length - 1]); + if (seq->index_parameter[index][seq_length - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + else { + plot[i].xrange = Range(0 , seq_length - 1); + if (seq_length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + plot[i].yrange = Range(0. , 1.); + + j = 0; + for (k = MAX(1 , nb_segment - 3);k < nb_segment;k++) { + legend.str(""); + legend << k + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + j++; + } + i++; + + // segment length distributions + + title.str(""); + title << SEQ_label[SEQL_SEGMENT_LENGTH] << " " << STAT_label[STATL_DISTRIBUTIONS]; + plot[i].title = title.str(); + + for (j = 0;j < nb_segment;j++) { + legend << SEQ_label[SEQL_SEGMENT] << " " << i; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + } + + legend << SEQ_label[SEQL_PRIOR_SEGMENT_LENGTH]; + plot[i][nb_segment].legend = legend.str(); + + plot[i][nb_segment].style = "linespoints"; + i++; + + // profiles of entropies conditional on the past + + plot[i].title = SEQ_label[SEQL_BEGIN_CONDITIONAL_ENTROPY]; + + if (seq->index_parameter) { + plot[i].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq_length - 1]); + if (seq->index_parameter[index][seq_length - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + else { + plot[i].xrange = Range(0 , seq_length - 1); + if (seq_length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + plot[i].yrange = Range(0. , log(2.)); + + j = 0; + for (k = MAX(1 , nb_segment - 3);k < nb_segment;k++) { + legend.str(""); + legend << k + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + j++; + } + i++; + + // profiles of entropies conditional on the future + + plot[i].title = SEQ_label[SEQL_END_CONDITIONAL_ENTROPY]; + + if (seq->index_parameter) { + plot[i].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq_length - 1]); + if (seq->index_parameter[index][seq_length - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + else { + plot[i].xrange = Range(0 , seq_length - 1); + if (seq_length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + plot[i].yrange = Range(0. , log(2.)); + + j = 0; + for (k = MAX(1 , nb_segment - 3);k < nb_segment;k++) { + legend.str(""); + legend << k + 1 << " " << SEQ_label[SEQL_SEGMENTS]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + j++; + } + i++; + + // entropy profiles + + if (seq->index_parameter) { + plot[i].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq_length - 1]); + if (seq->index_parameter[index][seq_length - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + else { + plot[i].xrange = Range(0 , seq_length - 1); + if (seq_length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + } + + plot[i].yrange = Range(0. , log(2.)); + + plot[i][0].legend = SEQ_label[SEQL_BEGIN_CONDITIONAL_ENTROPY]; + plot[i][0].style = "linespoints"; + + plot[i][1].legend = SEQ_label[SEQL_END_CONDITIONAL_ENTROPY]; + plot[i][1].style = "linespoints"; + + plot[i][2].legend = SEQ_label[SEQL_CHANGE_POINT_ENTROPY]; + plot[i][2].style = "linespoints"; + } + } + + delete seq; + + for (i = 1;i < seq->nb_variable;i++) { + delete [] rank[i]; + } + delete [] rank; + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of segment/change-point profiles and of entropy profiles for + * a single sequence or a sample of sequences and plot of the profiles. + * + * \param[in] error reference on a StatError object, + * \param[in] iidentifier sequence identifier, + * \param[in] nb_segment number of segments, + * \param[in] model_type segment model types, + * \param[in] common_contrast flag contrast functions common to the individuals, + * \param[in] shape_parameter negative binomial shape parameters, + * \param[in] output output type. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* Sequences::segment_profile_plotable_write(StatError &error , int iidentifier , + int nb_segment , vector &model_type , + bool common_contrast , vector &shape_parameter , + change_point_profile output) const + +{ + return segment_profile_plotable_write(error , iidentifier , nb_segment , model_type.data() , + common_contrast , shape_parameter.data() , output); +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/continuous_parametric_sequence_estimation.hpp b/src/cpp/sequence_analysis/continuous_parametric_sequence_estimation.hpp new file mode 100644 index 0000000..733be14 --- /dev/null +++ b/src/cpp/sequence_analysis/continuous_parametric_sequence_estimation.hpp @@ -0,0 +1,1461 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: continuous_distribution_sequence_estimation.hpp 12646 2012-08-03 08:12:47Z guedon $ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef CONTINUOUS_PARAMETRIC_SEQUENCE_ESTIMATION_HPP +#define CONTINUOUS_PARAMETRIC_SEQUENCE_ESTIMATION_HPP + + +#include + +#include + +#include "stat_tool/stat_label.h" + +#include "sequences.h" + +using namespace std; +using namespace boost::math; +using namespace stat_tool; + + +namespace sequence_analysis { + + +// extern double von_mises_concentration_computation(double mean_direction); + + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of gamma observation distributions. + * + * \param[in] state_sequence_count state counts, + * \param[in] variable variable index, + * \param[in] process pointer on a ContinuousParametricProcess object, + * \param[in] iter EM iteration. + */ +/*--------------------------------------------------------------*/ + +template +void MarkovianSequences::gamma_estimation(Type ***state_sequence_count , int variable , + ContinuousParametricProcess *process , int iter) const + +{ + int i , j , k; + double buff , diff , log_geometric_mean , *zero_mass , *mean; + long double *variance; + Type *state_frequency; + + + state_frequency = new Type[process->nb_state]; + zero_mass = new double[process->nb_state]; + mean = new double[process->nb_state]; + variance = new long double[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + zero_mass[i] = 0.; + mean[i] = 0.; + state_frequency[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + if (int_sequence[i][variable][j] == 0) { + zero_mass[k] += state_sequence_count[i][j][k]; + } + else { + mean[k] += state_sequence_count[i][j][k] * int_sequence[i][variable][j]; + } + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + if (real_sequence[i][variable][j] == 0.) { + zero_mass[k] += state_sequence_count[i][j][k]; + } + else { + mean[k] += state_sequence_count[i][j][k] * real_sequence[i][variable][j]; + } + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + mean[i] /= state_frequency[i]; + } + variance[i] = 0.; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = int_sequence[i][variable][j] - mean[k]; + variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = real_sequence[i][variable][j] - mean[k]; + variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { +// if (state_frequency[i] > 0) { +// variance[i] /= state_frequency[i]; + if (state_frequency[i] > 1) { + variance[i] /= (state_frequency[i] - 1); + } + } + + for (i = 0;i < process->nb_state;i++) { + +# ifdef DEBUG + if ((iter >= 5) && (state_frequency[i] > 0)) { + cout << "\n" << STAT_word[STATW_STATE] << " " << i << " : " + << zero_mass[i] << ", " << state_frequency[i] << " | " + << zero_mass[i] / state_frequency[i] << endl; + } +# endif + +// if (state_frequency[i] > 0) { + if (state_frequency[i] > 1) { + if (zero_mass[i] / state_frequency[i] > GAMMA_ZERO_FREQUENCY_THRESHOLD) { + process->observation[i]->shape = 0; + process->observation[i]->scale = D_DEFAULT; + } + + else { + if (variance[i] > 0.) { +/* if (sqrtl(variance[i]) < mean[i] * GAMMA_VARIATION_COEFF_THRESHOLD) { + variance[i] = mean[i] * mean[i] * GAMMA_VARIATION_COEFF_THRESHOLD * GAMMA_VARIATION_COEFF_THRESHOLD; + } + process->observation[i]->shape = mean[i] * mean[i] / variance[i]; + process->observation[i]->scale = variance[i] / mean[i]; */ + + // Hawang & Huang (2012), Ann. Inst. Statist. Math. 54(4), 840-847 + + buff = mean[i] * mean[i] / variance[i]; + if (buff > GAMMA_INVERSE_SAMPLE_SIZE_FACTOR / (double)state_frequency[i]) { + process->observation[i]->shape = buff - 1. / (double)state_frequency[i]; + } + else { + process->observation[i]->shape = buff; + } +/* if (process->observation[i]->shape < GAMMA_MIN_SHAPE_PARAMETER) { + process->observation[i]->shape = GAMMA_MIN_SHAPE_PARAMETER; + } */ + process->observation[i]->scale = mean[i] / process->observation[i]->shape; + + if ((process->observation[i]->shape >= GAMMA_SHAPE_PARAMETER_THRESHOLD) && + (state_frequency[i] < GAMMA_FREQUENCY_THRESHOLD)) { + log_geometric_mean = 0.; + + switch (type[variable]) { + + case INT_VALUE : { + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if (int_sequence[j][variable][k] > 0) { + log_geometric_mean += state_sequence_count[j][k][i] * log(int_sequence[j][variable][k]); + } + } + } + break; + } + + case REAL_VALUE : { + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if (real_sequence[j][variable][k] > 0.) { + log_geometric_mean += state_sequence_count[j][k][i] * log(real_sequence[j][variable][k]); + } + } + } + break; + } + } + + log_geometric_mean /= (state_frequency[i] - zero_mass[i]); +/* j = 0; to be reworked + +# ifdef DEBUG + cout << "\n" << STAT_word[STATW_STATE] << " " << i << " " + << STAT_word[STATW_SHAPE] << " : " << process->observation[i]->shape << " " + << STAT_word[STATW_SCALE] << " : " << process->observation[i]->scale << endl; +# endif + + do { + process->observation[i]->scale = exp(log_geometric_mean - digamma(process->observation[i]->shape)); + process->observation[i]->shape = mean[i] / process->observation[i]->scale; + j++; + +# ifdef DEBUG + cout << STAT_word[STATW_SHAPE] << " : " << process->observation[i]->shape << " " + << STAT_word[STATW_SCALE] << " : " << process->observation[i]->scale << endl; +# endif + + } + while (j < MIN(GAMMA_ITERATION_FACTOR * iter , GAMMA_MAX_NB_ITERATION)); */ + + // approximations Johnson, Kotz & Balakrishnan, Continuous Univariate Distributions, vol. 1, 2nd ed., pp. 361-362 + +// process->observation[i]->shape = mean[i] / (2 * (mean[i] - exp(log_geometric_mean))) - 1./12.; + diff = log(mean[i]) - log_geometric_mean; + process->observation[i]->shape = (1 + sqrt(1 + 4 * diff / 3)) / (4 * diff); + process->observation[i]->scale = mean[i] / process->observation[i]->shape; + } + } + + else { + process->observation[i]->shape = GAMMA_MIN_SHAPE_PARAMETER; + process->observation[i]->scale = GAMMA_DEFAULT_SCALE_PARAMETER; + } + } + } + + else { + process->observation[i]->shape = D_DEFAULT; + process->observation[i]->scale = D_DEFAULT; + } + } + + delete [] state_frequency; + delete [] zero_mass; + delete [] mean; + delete [] variance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of zero-inflated gamma observation distributions. + * + * \param[in] state_sequence_count state counts, + * \param[in] variable variable index, + * \param[in] process pointer on a ContinuousParametricProcess object, + * \param[in] iter EM iteration. + */ +/*--------------------------------------------------------------*/ + +template +void MarkovianSequences::zero_inflated_gamma_estimation(Type ***state_sequence_count , int variable , + ContinuousParametricProcess *process , int iter) const + +{ + int i , j , k; + double buff , diff , log_geometric_mean , *zero_mass , *mean; + long double *variance; + Type *state_frequency; + + + state_frequency = new Type[process->nb_state]; + zero_mass = new double[process->nb_state]; + mean = new double[process->nb_state]; + variance = new long double[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + zero_mass[i] = 0.; + mean[i] = 0.; + state_frequency[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + if (int_sequence[i][variable][j] == 0) { + zero_mass[k] += state_sequence_count[i][j][k]; + } + else { + mean[k] += state_sequence_count[i][j][k] * int_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + if (real_sequence[i][variable][j] == 0.) { + zero_mass[k] += state_sequence_count[i][j][k]; + } + else { + mean[k] += state_sequence_count[i][j][k] * real_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + mean[i] /= state_frequency[i]; + } + variance[i] = 0.; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + if (int_sequence[i][variable][j] > 0) { + diff = int_sequence[i][variable][j] - mean[k]; + variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + if (real_sequence[i][variable][j] > 0.) { + diff = real_sequence[i][variable][j] - mean[k]; + variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { +// if (state_frequency[i] > 0) { +// variance[i] /= state_frequency[i]; + if (state_frequency[i] > 1) { + variance[i] /= (state_frequency[i] - 1); + } + } + + for (i = 0;i < process->nb_state;i++) { + if (zero_mass[i] + state_frequency[i] > 0) { + if (zero_mass[i] / (zero_mass[i] + state_frequency[i]) > GAMMA_ZERO_FREQUENCY_THRESHOLD) { + process->observation[i]->zero_probability = 1.; + process->observation[i]->shape = D_DEFAULT; + process->observation[i]->scale = D_DEFAULT; + } + + else { + process->observation[i]->zero_probability = zero_mass[i] / (zero_mass[i] + state_frequency[i]); + + if ((variance[i] > 0.) && (state_frequency[i] > 1)) { +/* if (sqrtl(variance[i]) < mean[i] * GAMMA_VARIATION_COEFF_THRESHOLD) { + variance[i] = mean[i] * mean[i] * GAMMA_VARIATION_COEFF_THRESHOLD * GAMMA_VARIATION_COEFF_THRESHOLD; + } + process->observation[i]->shape = mean[i] * mean[i] / variance[i]; + process->observation[i]->scale = variance[i] / mean[i]; */ + + // Hawang & Huang (2012), Ann. Inst. Statist. Math. 54(4), 840-847 + + buff = mean[i] * mean[i] / variance[i]; + if (buff > GAMMA_INVERSE_SAMPLE_SIZE_FACTOR / (double)state_frequency[i]) { + process->observation[i]->shape = buff - 1. / (double)state_frequency[i]; + } + else { + process->observation[i]->shape = buff; + } +/* if (process->observation[i]->shape < GAMMA_MIN_SHAPE_PARAMETER) { + process->observation[i]->shape = GAMMA_MIN_SHAPE_PARAMETER; + } */ + process->observation[i]->scale = mean[i] / process->observation[i]->shape; + + if ((process->observation[i]->shape >= GAMMA_SHAPE_PARAMETER_THRESHOLD) && + (state_frequency[i] < GAMMA_FREQUENCY_THRESHOLD)) { + log_geometric_mean = 0.; + + switch (type[variable]) { + + case INT_VALUE : { + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if (int_sequence[j][variable][k] > 0) { + log_geometric_mean += state_sequence_count[j][k][i] * log(int_sequence[j][variable][k]); + } + } + } + break; + } + + case REAL_VALUE : { + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if (real_sequence[j][variable][k] > 0.) { + log_geometric_mean += state_sequence_count[j][k][i] * log(real_sequence[j][variable][k]); + } + } + } + break; + } + } + + log_geometric_mean /= state_frequency[i]; +/* j = 0; to be reworked + +# ifdef DEBUG + cout << "\n" << STAT_word[STATW_STATE] << " " << i << " " + << STAT_word[STATW_SHAPE] << " : " << process->observation[i]->shape << " " + << STAT_word[STATW_SCALE] << " : " << process->observation[i]->scale << endl; +# endif + + do { + process->observation[i]->scale = exp(log_geometric_mean - digamma(process->observation[i]->shape)); + process->observation[i]->shape = mean[i] / process->observation[i]->scale; + j++; + +# ifdef DEBUG + cout << STAT_word[STATW_SHAPE] << " : " << process->observation[i]->shape << " " + << STAT_word[STATW_SCALE] << " : " << process->observation[i]->scale << endl; +# endif + + } + while (j < MIN(GAMMA_ITERATION_FACTOR * iter , GAMMA_MAX_NB_ITERATION)); */ + + // approximations Johnson, Kotz & Balakrishnan, Continuous Univariate Distributions, vol. 1, 2nd ed., pp. 361-362 + +// process->observation[i]->shape = mean[i] / (2 * (mean[i] - exp(log_geometric_mean))) - 1./12.; + diff = log(mean[i]) - log_geometric_mean; + process->observation[i]->shape = (1 + sqrt(1 + 4 * diff / 3)) / (4 * diff); + process->observation[i]->scale = mean[i] / process->observation[i]->shape; + } + } + + else { + process->observation[i]->shape = GAMMA_MIN_SHAPE_PARAMETER; + process->observation[i]->scale = GAMMA_DEFAULT_SCALE_PARAMETER; + } + } + } + + else { + process->observation[i]->zero_probability = D_DEFAULT; + } + } + + delete [] state_frequency; + delete [] zero_mass; + delete [] mean; + delete [] variance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of inverse Gaussian observation distributions. + * + * \param[in] state_sequence_count state counts, + * \param[in] variable variable index, + * \param[in] process pointer on a ContinuousParametricProcess object. + */ +/*--------------------------------------------------------------*/ + +template +void MarkovianSequences::inverse_gaussian_estimation(Type ***state_sequence_count , int variable , + ContinuousParametricProcess *process) const + +{ + int i , j , k; + double *mean , *inverse_scale; + Type *state_frequency; + + + state_frequency = new Type[process->nb_state]; + mean = new double[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + mean[i] = 0.; + state_frequency[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean[k] += state_sequence_count[i][j][k] * int_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean[k] += state_sequence_count[i][j][k] * real_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + mean[i] /= state_frequency[i]; + process->observation[i]->location = mean[i]; + } + else { + process->observation[i]->location = D_DEFAULT; + } + } + + inverse_scale = new double[process->nb_state]; + for (i = 0;i < process->nb_state;i++) { + inverse_scale[i] = 0.; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + if ((mean[k] > 0.) && (int_sequence[i][variable][j] > 0.)) { + inverse_scale[k] += state_sequence_count[i][j][k] * (1. / (double)int_sequence[i][variable][j] - 1. / mean[k]); + } + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + if ((mean[k] > 0.) && (real_sequence[i][variable][j] > 0.)) { + inverse_scale[k] += state_sequence_count[i][j][k] * (1. / real_sequence[i][variable][j] - 1. / mean[k]); + } + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (inverse_scale[i] > 0.) { + process->observation[i]->scale = state_frequency[i] / inverse_scale[i]; + } + else { + process->observation[i]->scale = D_DEFAULT; + } + } + + delete [] state_frequency; + delete [] mean; + delete [] inverse_scale; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of Gaussian observation distributions. + * + * \param[in] state_sequence_count state counts, + * \param[in] variable variable index, + * \param[in] process pointer on a ContinuousParametricProcess object. + */ +/*--------------------------------------------------------------*/ + +template +void MarkovianSequences::gaussian_estimation(Type ***state_sequence_count , int variable , + ContinuousParametricProcess *process) const + +{ + int i , j , k; + double diff , *mean; + long double *variance; + Type *state_frequency; + + + state_frequency = new Type[process->nb_state]; + mean = new double[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + mean[i] = 0.; + state_frequency[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean[k] += state_sequence_count[i][j][k] * int_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean[k] += state_sequence_count[i][j][k] * real_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + mean[i] /= state_frequency[i]; + process->observation[i]->location = mean[i]; + } + else { + process->observation[i]->location = D_INF; + } + } + + if (process->tied_dispersion) { + for (i = 1;i < process->nb_state;i++) { + state_frequency[0] += state_frequency[i]; + } + + variance = new long double[1]; + variance[0] = 0.; + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = int_sequence[i][variable][j] - mean[k]; + variance[0] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = real_sequence[i][variable][j] - mean[k]; + variance[0] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + } + +// variance[0] /= state_frequency[0]; + variance[0] /= (state_frequency[0] - process->nb_state); + + process->observation[0]->dispersion = sqrtl(variance[0]); + for (i = 1;i < process->nb_state;i++) { + process->observation[i]->dispersion = process->observation[0]->dispersion; + } + } + + else { + variance = new long double[process->nb_state]; + for (i = 0;i < process->nb_state;i++) { + variance[i] = 0.; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = int_sequence[i][variable][j] - mean[k]; + variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = real_sequence[i][variable][j] - mean[k]; + variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { +// if (state_frequency[i] > 0) { +// variance[i] /= state_frequency[i]; + if (state_frequency[i] > 1) { + variance[i] /= (state_frequency[i] - 1); + process->observation[i]->dispersion = sqrtl(variance[i]); + if ((process->observation[i]->location != 0.) && + (process->observation[i]->dispersion / process->observation[i]->location < GAUSSIAN_MIN_VARIATION_COEFF)) { + process->observation[i]->dispersion = process->observation[i]->location * GAUSSIAN_MIN_VARIATION_COEFF; + } + } + } + } + + delete [] state_frequency; + delete [] mean; + delete [] variance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of von Mises observation distributions. + * + * \param[in] state_sequence_count state counts, + * \param[in] variable variable index, + * \param[in] process pointer on a ContinuousParametricProcess object. + */ +/*--------------------------------------------------------------*/ + +template +void MarkovianSequences::von_mises_estimation(Type ***state_sequence_count , int variable , + ContinuousParametricProcess *process) const + +{ + int i , j , k; + double buff , global_mean_direction , concentration , **mean_direction; + Type *state_frequency; + + + state_frequency = new Type[process->nb_state]; + mean_direction = new double*[process->nb_state]; + for (i = 0;i < process->nb_state;i++) { + mean_direction[i] = new double[4]; + } + + for (i = 0;i < process->nb_state;i++) { + mean_direction[i][0] = 0.; + mean_direction[i][1] = 0.; + + state_frequency[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean_direction[k][0] += state_sequence_count[i][j][k] * cos(int_sequence[i][variable][j] * M_PI / 180); + mean_direction[k][1] += state_sequence_count[i][j][k] * sin(int_sequence[i][variable][j] * M_PI / 180); + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + switch (process->unit) { + case DEGREE : + mean_direction[k][0] += state_sequence_count[i][j][k] * cos(real_sequence[i][variable][j] * M_PI / 180); + mean_direction[k][1] += state_sequence_count[i][j][k] * sin(real_sequence[i][variable][j] * M_PI / 180); + break; + case RADIAN : + mean_direction[k][0] += state_sequence_count[i][j][k] * cos(real_sequence[i][variable][j]); + mean_direction[k][1] += state_sequence_count[i][j][k] * sin(real_sequence[i][variable][j]); + break; + } + + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + mean_direction[i][0] /= state_frequency[i]; + mean_direction[i][1] /= state_frequency[i]; + + mean_direction[i][2] = sqrt(mean_direction[i][0] * mean_direction[i][0] + + mean_direction[i][1] * mean_direction[i][1]); + + if (mean_direction[i][2] > 0.) { + mean_direction[i][3] = atan(mean_direction[i][1] / mean_direction[i][0]); + + if (mean_direction[i][0] < 0.) { + mean_direction[i][3] += M_PI; + } + else if (mean_direction[i][1] < 0.) { + mean_direction[i][3] += 2 * M_PI; + } + + if (process->unit == DEGREE) { + mean_direction[i][3] *= (180 / M_PI); + } + + process->observation[i]->location = mean_direction[i][3]; + } + + else { + process->observation[i]->location = D_INF; + } + } + + else { + process->observation[i]->location = D_INF; + } + } + + if (process->tied_dispersion) { + global_mean_direction = 0.; + buff = 0.; + + for (i = 0;i < process->nb_state;i++) { + global_mean_direction += state_frequency[i] * mean_direction[i][2]; + buff += state_frequency[i]; + } + concentration = von_mises_concentration_computation(global_mean_direction / buff); + + for (i = 0;i < process->nb_state;i++) { + process->observation[i]->dispersion = concentration; + } + } + + else { + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + process->observation[i]->dispersion = von_mises_concentration_computation(mean_direction[i][2]); + } + else { + process->observation[i]->dispersion = D_DEFAULT; + } + } + } + + for (i = 0;i < process->nb_state;i++) { + delete [] mean_direction[i]; + } + delete [] mean_direction; + + delete [] state_frequency; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of Gaussian linear trend observation models. + * + * \param[in] state_sequence_count state counts, + * \param[in] variable variable index, + * \param[in] process pointer on a ContinuousParametricProcess object. + */ +/*--------------------------------------------------------------*/ + +template +void MarkovianSequences::linear_model_estimation(Type ***state_sequence_count , int variable , + ContinuousParametricProcess *process) const + +{ + int i , j , k; + double diff , threshold , *mean , *index_parameter_mean , *index_parameter_variance; + long double *variance , *covariance , *residual_square_sum; + Type *state_frequency; + + + state_frequency = new Type[process->nb_state]; + mean = new double[process->nb_state]; + index_parameter_mean = new double[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + mean[i] = 0.; + index_parameter_mean[i] = 0.; + state_frequency[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean[k] += state_sequence_count[i][j][k] * int_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean[k] += state_sequence_count[i][j][k] * real_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + } + + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + index_parameter_mean[k] += state_sequence_count[i][j][k] * j; + } + } + } + break; + } + + case TIME : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + index_parameter_mean[k] += state_sequence_count[i][j][k] * index_parameter[i][j]; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + mean[i] /= state_frequency[i]; + index_parameter_mean[i] /= state_frequency[i]; + } + } + + variance = new long double[process->nb_state]; + index_parameter_variance = new double[process->nb_state]; + covariance = new long double[process->nb_state]; + for (i = 0;i < process->nb_state;i++) { + variance[i] = 0.; + index_parameter_variance[i] = 0.; + covariance[i] = 0.; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = int_sequence[i][variable][j] - mean[k]; + variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = real_sequence[i][variable][j] - mean[k]; + variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + } + + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = j - index_parameter_mean[k]; + index_parameter_variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + + case TIME : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = index_parameter[i][j] - index_parameter_mean[k]; + index_parameter_variance[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + } + + switch (type[variable]) { + + case INT_VALUE : { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + covariance[k] += state_sequence_count[i][j][k] * (int_sequence[i][variable][j] - mean[k]) * + (j - index_parameter_mean[k]); + } + } + } + break; + } + + case TIME : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + covariance[k] += state_sequence_count[i][j][k] * (int_sequence[i][variable][j] - mean[k]) * + (index_parameter[i][j] - index_parameter_mean[k]); + } + } + } + break; + } + } + break; + } + + case REAL_VALUE : { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + covariance[k] += state_sequence_count[i][j][k] * (real_sequence[i][variable][j] - mean[k]) * + (j - index_parameter_mean[k]); + } + } + } + break; + } + + case TIME : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + covariance[k] += state_sequence_count[i][j][k] * (real_sequence[i][variable][j] - mean[k]) * + (index_parameter[i][j] - index_parameter_mean[k]); + } + } + } + break; + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + process->observation[i]->slope = covariance[i] / index_parameter_variance[i]; + process->observation[i]->intercept = mean[i] - process->observation[i]->slope * index_parameter_mean[i]; + if (variance[i] > 0.) { + process->observation[i]->correlation = covariance[i] / sqrtl(variance[i] * index_parameter_variance[i]); + } + else { + process->observation[i]->correlation = 0.; + } + } + + else { + process->observation[i]->slope = D_INF; + process->observation[i]->intercept = D_INF; + } + } + + residual_square_sum = new long double[process->nb_state]; + for (i = 0;i < process->nb_state;i++) { + residual_square_sum[i] = 0.; + } + + switch (type[variable]) { + + case INT_VALUE : { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = int_sequence[i][variable][j] - (process->observation[k]->intercept + + process->observation[k]->slope * j); + residual_square_sum[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + + case TIME : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = int_sequence[i][variable][j] - (process->observation[k]->intercept + + process->observation[k]->slope * index_parameter[i][j]); + residual_square_sum[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + } + break; + } + + case REAL_VALUE : { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = real_sequence[i][variable][j] - (process->observation[k]->intercept + + process->observation[k]->slope * j); + residual_square_sum[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + + case TIME : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = real_sequence[i][variable][j] - (process->observation[k]->intercept + + process->observation[k]->slope * index_parameter[i][j]); + residual_square_sum[k] += state_sequence_count[i][j][k] * diff * diff; + } + } + } + break; + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 2) { + residual_square_sum[i] /= (state_frequency[i] - 2); + process->observation[i]->slope_standard_deviation = sqrtl(residual_square_sum[i] / index_parameter_variance[i]); + process->observation[i]->sample_size = state_frequency[i] - 2; + } + else { + process->observation[i]->slope_standard_deviation = 0.; + process->observation[i]->sample_size = 0.; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 2) { + process->observation[i]->dispersion = sqrtl(residual_square_sum[i]); + + // Commented by JB: cannot find the reason for code below. + // Moreover if mean[i] < 0 dispersion would be < 0 + /* if (mean[i] != 0.) { + if (process->observation[i]->dispersion / mean[i] < GAUSSIAN_MIN_VARIATION_COEFF) { + process->observation[i]->dispersion = mean[i] * GAUSSIAN_MIN_VARIATION_COEFF; + } + }*/ + + // else { + threshold = sqrt(variance_computation(variable , mean_computation(variable))); + +# ifdef DEBUG + cout << "\nTHRESHOLD: " << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " " + << STAT_word[STATW_STATE] << " " << i << " " << threshold << endl; +# endif + + threshold *= RESIDUAL_STANDARD_DEVIATION_COEFF; + if (process->observation[i]->dispersion < threshold) { + process->observation[i]->dispersion = threshold; + } + // } + } + +# ifdef DEBUG + cout << "\n" << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " " + << STAT_word[STATW_STATE] << " " << i << " " + << STAT_word[STATW_INTERCEPT] << " : " << process->observation[i]->intercept << " " + << STAT_word[STATW_SLOPE] << " : " << process->observation[i]->slope << " " + << STAT_word[STATW_STANDARD_DEVIATION] << " : " << process->observation[i]->dispersion << endl; +# endif + + } + + delete [] state_frequency; + delete [] mean; + delete [] index_parameter_mean; + delete [] variance; + delete [] index_parameter_variance; + delete [] covariance; + delete [] residual_square_sum; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a 1st-order autoregressive observation process. + * + * \param[in] state_sequence_count state counts, + * \param[in] variable variable index, + * \param[in] process pointer on a ContinuousParametricProcess object. + */ +/*--------------------------------------------------------------*/ + +template +void MarkovianSequences::autoregressive_model_estimation(Type ***state_sequence_count , int variable , + ContinuousParametricProcess *process) const + +{ + int i , j , k; + double diff , shifted_diff , residual_square_sum , *mean; + long double *square_sum , *shifted_square_sum , *autocovariance; + Type *state_frequency; + + + state_frequency = new Type[process->nb_state]; + mean = new double[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + mean[i] = 0.; + state_frequency[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean[k] += state_sequence_count[i][j][k] * int_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + mean[k] += state_sequence_count[i][j][k] * real_sequence[i][variable][j]; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_frequency[i] > 0) { + mean[i] /= state_frequency[i]; + process->observation[i]->location = mean[i]; + } + else { + process->observation[i]->location = D_INF; + } + } + + square_sum = new long double[process->nb_state]; + shifted_square_sum = new long double[process->nb_state]; + autocovariance = new long double[process->nb_state]; + for (i = 0;i < process->nb_state;i++) { + square_sum[i] = 0.; + shifted_square_sum[i] = 0.; + autocovariance[i] = 0.; + state_frequency[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = int_sequence[i][variable][j] - mean[k]; + shifted_diff = int_sequence[i][variable][j - 1] - mean[k]; + square_sum[k] += state_sequence_count[i][j][k] * diff * diff; + shifted_square_sum[k] += state_sequence_count[i][j][k] * shifted_diff * shifted_diff; + autocovariance[k] += state_sequence_count[i][j][k] * diff * shifted_diff; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < length[i];j++) { + for (k = 0;k < process->nb_state;k++) { + diff = real_sequence[i][variable][j] - mean[k]; + shifted_diff = real_sequence[i][variable][j - 1] - mean[k]; + square_sum[k] += state_sequence_count[i][j][k] * diff * diff; + shifted_square_sum[k] += state_sequence_count[i][j][k] * shifted_diff * shifted_diff; + autocovariance[k] += state_sequence_count[i][j][k] * diff * shifted_diff; + state_frequency[k] += state_sequence_count[i][j][k]; + } + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if ((shifted_square_sum[i] > 0.) && (state_frequency[i] > 2)) { + process->observation[i]->autoregressive_coeff = autocovariance[i] / shifted_square_sum[i]; + if (process->observation[i]->autoregressive_coeff < -1.) { + process->observation[i]->autoregressive_coeff = -1.; + } + else if (process->observation[i]->autoregressive_coeff > 1.) { + process->observation[i]->autoregressive_coeff = 1.; + } + + residual_square_sum = (square_sum[i] - autocovariance[i] * autocovariance[i] / + shifted_square_sum[i]) / (state_frequency[i] - 2); + process->observation[i]->dispersion = sqrt(residual_square_sum); + + process->observation[i]->determination_coeff = 1.; + if (square_sum[i] > 0.) { + process->observation[i]->determination_coeff -= residual_square_sum / square_sum[i]; + } + + if ((process->observation[i]->location != 0.) && + (process->observation[i]->dispersion / process->observation[i]->location < GAUSSIAN_MIN_VARIATION_COEFF)) { + process->observation[i]->dispersion = process->observation[i]->location * GAUSSIAN_MIN_VARIATION_COEFF; + } + process->observation[i]->sample_size = state_frequency[i] - 2; + } + + else { + process->observation[i]->autoregressive_coeff = 0.; + process->observation[i]->dispersion = 0.; + process->observation[i]->determination_coeff = D_DEFAULT; + process->observation[i]->sample_size = 0.; + } + } + + delete [] state_frequency; + delete [] mean; + delete [] square_sum; + delete [] shifted_square_sum; + delete [] autocovariance; +} + + +}; // namespace sequence_analysis + + + +#endif diff --git a/src/cpp/sequence_analysis/correlation.cpp b/src/cpp/sequence_analysis/correlation.cpp new file mode 100644 index 0000000..0f5ef11 --- /dev/null +++ b/src/cpp/sequence_analysis/correlation.cpp @@ -0,0 +1,2159 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include + +#include + +#include "stat_tool/stat_label.h" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost::math; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the Correlation class. + */ +/*--------------------------------------------------------------*/ + +Correlation::Correlation() + +{ + type = PEARSON; + + variable_type = NULL; + + variable1 = NULL; + variable2 = NULL; + + function_type = VOID; + theoretical_function = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Correlation class. + * + * \param[in] itype correlation coefficient type (PEARSON/SPEARMAN/KENDALL), + * \param[in] max_lag maximum lag, + * \param[in] ivariable1 1st variable index, + * \param[in] ivariable2 2nd variable index. + */ +/*--------------------------------------------------------------*/ + +Correlation::Correlation(correlation_type itype , int max_lag , int ivariable1 , int ivariable2) +:Curves(1 , max_lag + 1 , true , false) + +{ + type = itype; + + variable_type = new correlation_variable_type[1]; + variable_type[0] = OBSERVED_VALUE; + + variable1 = new int[1]; + variable2 = new int[1]; + variable1[0] = ivariable1; + variable2[0] = ivariable2; + + function_type = VOID; + theoretical_function = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Correlation class by merging. + * + * \param[in] inb_curve number of correlation functions, + * \param[in] ilength maximum lag, + * \param[in] frequency_flag flag on the frequencies, + * \param[in] itype correlation coefficient type (PEARSON/SPEARMAN/KENDALL). + */ +/*--------------------------------------------------------------*/ + +Correlation::Correlation(int inb_curve , int ilength , bool frequency_flag , correlation_type itype) +:Curves(inb_curve , ilength , frequency_flag , false) + +{ + type = itype; + + variable_type = new correlation_variable_type[nb_curve]; + + variable1 = new int[nb_curve]; + variable2 = new int[nb_curve]; + + function_type = VOID; + theoretical_function = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Correlation object. + * + * \param[in] correl reference on a Correlation object. + */ +/*--------------------------------------------------------------*/ + +void Correlation::copy(const Correlation &correl) + +{ + int i; + + + type = correl.type; + + variable_type = new correlation_variable_type[nb_curve]; + for (i = 0;i < nb_curve;i++) { + variable_type[i] = correl.variable_type[i]; + } + + variable1 = new int[nb_curve]; + variable2 = new int[nb_curve]; + for (i = 0;i < nb_curve;i++) { + variable1[i] = correl.variable1[i]; + variable2[i] = correl.variable2[i]; + } + + function_type = correl.function_type; + + if (correl.theoretical_function) { + theoretical_function = new double[length]; + for (i = 0;i < length;i++) { + theoretical_function[i] = correl.theoretical_function[i]; + } + } + else { + theoretical_function = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a Correlation object. + */ +/*--------------------------------------------------------------*/ + +void Correlation::remove() + +{ + delete [] variable_type; + + delete [] variable1; + delete [] variable2; + + delete [] theoretical_function; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the Correlation class. + */ +/*--------------------------------------------------------------*/ + +Correlation::~Correlation() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the Correlation class. + * + * \param[in] correl reference on a Correlation object. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation& Correlation::operator=(const Correlation &correl) + +{ + if (&correl != this) { + remove(); + Curves::remove(); + + Curves::copy(correl); + copy(correl); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of Correlation objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_correl number of Correlation objects, + * \param[in] icorrel pointer on the Correlation objects. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* Correlation::merge(StatError &error , int nb_correl , + const Correlation **icorrel) const + +{ + bool status = true; + int i , j , k , m; + int inb_curve , *pfrequency; + Correlation *correl; + const Correlation **pcorrel; + + + correl = NULL; + error.init(); + + pfrequency = frequency; + + for (i = 0;i < nb_correl;i++) { + if ((icorrel[i]->type != type) || (icorrel[i]->offset != offset)) { + status = false; + ostringstream error_message , correction_message; + error_message << SEQ_label[SEQL_CORRELATION_FUNCTION] << " " << i + 2 << ": " + << SEQ_error[SEQR_CORRELATION_COEFF_TYPE]; + + switch (type) { + case SPEARMAN : + correction_message << SEQ_label[SEQL_SPEARMAN] << " "; + break; + case KENDALL : + correction_message << SEQ_label[SEQL_KENDALL] << " "; + break; + } + + if (offset == 1) { + correction_message << SEQ_label[SEQL_PARTIAL] << " "; + } + + if ((type == SPEARMAN) || (type == KENDALL)) { + correction_message << SEQ_label[SEQL_RANK] << " "; + } + + correction_message << SEQ_label[SEQL_CORRELATION_FUNCTION]; + + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + if (icorrel[i]->length != length) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_CORRELATION_FUNCTION] << " " << i + 2 << ": " + << SEQ_error[SEQR_MAX_LAG]; + error.correction_update((error_message.str()).c_str() , length - 1); + } + + else if (icorrel[i]->frequency) { + if (!pfrequency) { + pfrequency = icorrel[i]->frequency; + } + + else { + for (j = offset;j < length;j++) { + if (icorrel[i]->frequency[j] != pfrequency[j]) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_CORRELATION_FUNCTION] << " " << i + 2 << ": " + << SEQ_label[SEQL_LAG] << " " << j << ": " << SEQ_error[SEQR_FREQUENCY]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + + if (status) { + nb_correl++; + pcorrel = new const Correlation*[nb_correl]; + + pcorrel[0] = this; + for (i = 1;i < nb_correl;i++) { + pcorrel[i] = icorrel[i - 1]; + } + + inb_curve = 0; + for (i = 0;i < nb_correl;i++) { + inb_curve += pcorrel[i]->nb_curve; + } + + correl = new Correlation(inb_curve , length , (pfrequency ? true : false) , type); + + correl->offset = offset; + + i = 0; + for (j = 0;j < nb_correl;j++) { + for (k = 0;k < pcorrel[j]->nb_curve;k++) { + correl->variable_type[i] = pcorrel[j]->variable_type[k]; + correl->variable1[i] = pcorrel[j]->variable1[k]; + correl->variable2[i++] = pcorrel[j]->variable2[k]; + } + } + + if (pfrequency) { + for (i = 0;i < length;i++) { + correl->frequency[i] = pfrequency[i]; + } + } + + i = 0; + for (j = 0;j < nb_correl;j++) { + for (k = 0;k < pcorrel[j]->nb_curve;k++) { + for (m = 0;m < length;m++) { + correl->point[i][m] = pcorrel[j]->point[k][m]; + } + i++; + } + } + + delete [] pcorrel; + } + + return correl; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing on a single line of a Correlation object. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& Correlation::line_write(ostream &os) const + +{ + int i; + int autocorrelation , cross_correlation; + + + switch (type) { + case SPEARMAN : + os << SEQ_label[SEQL_SPEARMAN] << " "; + break; + case KENDALL : + os << SEQ_label[SEQL_KENDALL] << " "; + break; + } + + if (offset == 1) { + os << SEQ_label[SEQL_PARTIAL] << " "; + } + + if ((type == SPEARMAN) || (type == KENDALL)) { + os << SEQ_label[SEQL_RANK] << " "; + } + + autocorrelation = true; + cross_correlation = true; + for (i = 0;i < nb_curve;i++) { + if (variable_type[i] == OBSERVED_VALUE) { + if (variable1[i] != variable2[i]) { + autocorrelation = false; + } + else if (variable1[i] == variable2[i]) { + cross_correlation = false; + } + } + + else { + cross_correlation = false; + } + } + + if (autocorrelation) { + os << SEQ_label[SEQL_AUTO]; + } + else if (cross_correlation) { + os << SEQ_label[SEQL_CROSS]; + } + + os << SEQ_label[SEQL_CORRELATION_FUNCTION] << " " << SEQ_label[SEQL_MAX_LAG] << ": " << length - 1; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Correlation object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& Correlation::ascii_write(ostream &os , bool exhaustive) const + +{ + bool autocorrelation , cross_correlation; + int i , j; + int *width; + double standard_normal_value , *confidence_limit; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + // computation of the confidence limits + + if (frequency) { + normal dist; + standard_normal_value = quantile(complement(dist , 0.025)); + + confidence_limit = new double[length]; + + for (i = 0;i < length;i++) { + switch (type) { + case PEARSON : + confidence_limit[i] = standard_normal_value / sqrt((double)frequency[i]); + break; + case SPEARMAN : + confidence_limit[i] = standard_normal_value / sqrt((double)frequency[i]); + break; + case KENDALL : + confidence_limit[i] = standard_normal_value * sqrt((2 * (2 * (double)frequency[i] + 5)) / + (9 * (double)frequency[i] * (double)(frequency[i] - 1))); + break; + } + } + } + + os << "\n"; + switch (type) { + case SPEARMAN : + os << SEQ_label[SEQL_SPEARMAN] << " "; + break; + case KENDALL : + os << SEQ_label[SEQL_KENDALL] << " "; + break; + } + + if (offset == 1) { + os << SEQ_label[SEQL_PARTIAL] << " "; + } + + if ((type == SPEARMAN) || (type == KENDALL)) { + os << SEQ_label[SEQL_RANK] << " "; + } + + autocorrelation = true; + cross_correlation = true; + for (i = 0;i < nb_curve;i++) { + if (variable_type[i] == OBSERVED_VALUE) { + if (variable1[i] != variable2[i]) { + autocorrelation = false; + } + else if (variable1[i] == variable2[i]) { + cross_correlation = false; + } + } + + else { + cross_correlation = false; + } + } + + if (autocorrelation) { + os << SEQ_label[SEQL_AUTO]; + } + else if (cross_correlation) { + os << SEQ_label[SEQL_CROSS]; + } + os << SEQ_label[SEQL_CORRELATION_FUNCTION]; + + // computation of the column widths + + width = new int[nb_curve + 4]; + + width[0] = column_width(length - 1); + for (i = 0;i < nb_curve;i++) { + width[i + 1] = column_width(length - offset , point[i] + offset) + ASCII_SPACE; + } + + i = nb_curve + 1; + if (theoretical_function) { + width[i++] = column_width(length , theoretical_function) + ASCII_SPACE; + } + if (frequency) { + width[i++] = column_width(length - offset , confidence_limit + offset) + ASCII_SPACE; + width[i++] = column_width(frequency[offset]) + ASCII_SPACE; + } + + os << "\n "; + for (i = 0;i < nb_curve;i++) { + if (variable_type[i] == OBSERVED_VALUE) { + os << " | " << STAT_label[STATL_VARIABLE] << " " << variable1[i]; + if (variable1[i] != variable2[i]) { + os << " " << STAT_label[STATL_VARIABLE] << " " << variable2[i]; + } + } + + else { + switch (variable_type[i]) { + case OBSERVED_STATE : + os << " | " << SEQ_label[SEQL_OBSERVED] << " " << STAT_label[STATL_STATE]; + break; + case THEORETICAL_STATE : + os << " | " << SEQ_label[SEQL_THEORETICAL] << " " << STAT_label[STATL_STATE]; + break; + case OBSERVED_OUTPUT : + os << " | " << SEQ_label[SEQL_OBSERVED] << " " << STAT_label[STATL_OUTPUT]; + break; + case THEORETICAL_OUTPUT : + os << " | " << SEQ_label[SEQL_THEORETICAL] << " " << STAT_label[STATL_OUTPUT]; + break; + } + + os << " " << variable1[i]; + } + } + + if (theoretical_function) { + switch (function_type) { + case AUTOREGRESSIVE : + os << " | " << SEQ_label[SEQL_AUTOREGRESSIVE_MODEL]; + break; + case WHITE_NOISE : + os << " | " << SEQ_label[SEQL_WHITE_NOISE]; + break; + } + } + + if (frequency) { + os << " | " << SEQ_label[SEQL_RANDOMNESS_95_CONFIDENCE_LIMIT] + << " | " << STAT_label[STATL_FREQUENCY]; + } + os << endl; + + for (i = offset;i < length;i++) { + os << setw(width[0]) << i; + for (j = 0;j < nb_curve;j++) { + os << setw(width[j + 1]) << point[j][i]; + } + + j = nb_curve + 1; + if (theoretical_function) { + os << setw(width[j++]) << theoretical_function[i]; + } + if (frequency) { + os << setw(width[j++]) << confidence_limit[i]; + os << setw(width[j++]) << frequency[i]; + } + os << endl; + } + + if (frequency) { + delete [] confidence_limit; + } + delete [] width; + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Correlation object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Correlation::ascii_write(StatError &error , const string path , bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + ascii_write(out_file , exhaustive); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Correlation object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Correlation::spreadsheet_write(StatError &error , const string path) const + +{ + bool status , autocorrelation , cross_correlation; + int i , j; + double standard_normal_value , confidence_limit; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + + switch (type) { + case SPEARMAN : + out_file << SEQ_label[SEQL_SPEARMAN] << " "; + break; + case KENDALL : + out_file << SEQ_label[SEQL_KENDALL] << " "; + break; + } + + if (offset == 1) { + out_file << SEQ_label[SEQL_PARTIAL] << " "; + } + + if ((type == SPEARMAN) || (type == KENDALL)) { + out_file << SEQ_label[SEQL_RANK] << " "; + } + + autocorrelation = true; + cross_correlation = true; + for (i = 0;i < nb_curve;i++) { + if (variable_type[i] == OBSERVED_VALUE) { + if (variable1[i] != variable2[i]) { + autocorrelation = false; + } + else if (variable1[i] == variable2[i]) { + cross_correlation = false; + } + } + + else { + cross_correlation = false; + } + } + + if (autocorrelation) { + out_file << SEQ_label[SEQL_AUTO]; + } + else if (cross_correlation) { + out_file << SEQ_label[SEQL_CROSS]; + } + out_file << SEQ_label[SEQL_CORRELATION_FUNCTION]; + + out_file << "\n"; + for (i = 0;i < nb_curve;i++) { + if (variable_type[i] == OBSERVED_VALUE) { + out_file << "\t" << STAT_label[STATL_VARIABLE] << " " << variable1[i]; + if (variable1[i] != variable2[i]) { + out_file << " " << STAT_label[STATL_VARIABLE] << " " << variable2[i]; + } + } + + else { + switch (variable_type[i]) { + case OBSERVED_STATE : + out_file << "\t" << SEQ_label[SEQL_OBSERVED] << " " << STAT_label[STATL_STATE]; + break; + case THEORETICAL_STATE : + out_file << "\t" << SEQ_label[SEQL_THEORETICAL] << " " << STAT_label[STATL_STATE]; + break; + case OBSERVED_OUTPUT : + out_file << "\t" << SEQ_label[SEQL_OBSERVED] << " " << STAT_label[STATL_OUTPUT]; + break; + case THEORETICAL_OUTPUT : + out_file << "\t" << SEQ_label[SEQL_THEORETICAL] << " " << STAT_label[STATL_OUTPUT]; + break; + } + + out_file << " " << variable1[i]; + } + } + + if (theoretical_function) { + switch (function_type) { + case AUTOREGRESSIVE : + out_file << "\t" << SEQ_label[SEQL_AUTOREGRESSIVE_MODEL]; + break; + case WHITE_NOISE : + out_file << "\t" << SEQ_label[SEQL_WHITE_NOISE]; + break; + } + } + + if (frequency) { + out_file << "\t" << SEQ_label[SEQL_RANDOMNESS_95_CONFIDENCE_LIMIT] + << "\t" << SEQ_label[SEQL_RANDOMNESS_95_CONFIDENCE_LIMIT] + << "\t" << STAT_label[STATL_FREQUENCY]; + } + out_file << endl; + + if (frequency) { + normal dist; + standard_normal_value = quantile(complement(dist , 0.025)); + } + + for (i = offset;i < length;i++) { + out_file << i; + for (j = 0;j < nb_curve;j++) { + out_file << "\t" << point[j][i]; + } + + if (theoretical_function) { + out_file << "\t" << theoretical_function[i]; + } + + if (frequency) { + switch (type) { + case PEARSON : + confidence_limit = standard_normal_value / sqrt((double)frequency[i]); + break; + case SPEARMAN : + confidence_limit = standard_normal_value / sqrt((double)frequency[i]); + break; + case KENDALL : + confidence_limit = standard_normal_value * sqrt((2 * (2 * (double)frequency[i] + 5)) / + (9 * (double)frequency[i] * (double)(frequency[i] - 1))); + break; + } + + out_file << "\t" << confidence_limit << "\t" << -confidence_limit + << "\t" << frequency[i]; + } + + out_file << endl; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Correlation object and the associated confidence limits + * at the Gnuplot format. + * + * \param[in] path file path, + * \param[in] confidence_limit confidence limits. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Correlation::plot_print(const char *path , double *confidence_limit) const + +{ + bool status = false; + int i , j; + ofstream out_file(path); + + + if (out_file) { + status = true; + + for (i = 0;i < length;i++) { + for (j = 0;j < nb_curve;j++) { + out_file << point[j][i] << " "; + } + if (theoretical_function) { + out_file << theoretical_function[i] << " "; + } + if (frequency) { + out_file << confidence_limit[i] << " " << -confidence_limit[i] << " " + << frequency[i]; + } + out_file << endl; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a Correlation object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Correlation::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status , autocorrelation , cross_correlation; + int i , j; + double standard_normal_value , *confidence_limit = NULL; + ostringstream data_file_name; + + + error.init(); + + // computation of the confidence limits + + if (frequency) { + normal dist; + standard_normal_value = quantile(complement(dist , 0.025)); + + confidence_limit = new double[length]; + + for (i = 0;i < length;i++) { + switch (type) { + case PEARSON : + confidence_limit[i] = standard_normal_value / sqrt((double)frequency[i]); + break; + case SPEARMAN : + confidence_limit[i] = standard_normal_value / sqrt((double)frequency[i]); + break; + case KENDALL : + confidence_limit[i] = standard_normal_value * sqrt((2 * (2 * (double)frequency[i] + 5)) / + (9 * (double)frequency[i] * (double)(frequency[i] - 1))); + break; + } + } + } + + // writing of the data file + + data_file_name << prefix << ".dat"; + status = plot_print((data_file_name.str()).c_str() , confidence_limit); + + if (frequency) { + delete [] confidence_limit; + } + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + else { + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title" << " \""; + if (title) { + out_file << title << " - "; + } + + switch (type) { + case SPEARMAN : + out_file << SEQ_label[SEQL_SPEARMAN] << " "; + break; + case KENDALL : + out_file << SEQ_label[SEQL_KENDALL] << " "; + break; + } + + if (offset == 1) { + out_file << SEQ_label[SEQL_PARTIAL] << " "; + } + + if ((type == SPEARMAN) || (type == KENDALL)) { + out_file << SEQ_label[SEQL_RANK] << " "; + } + + autocorrelation = true; + cross_correlation = true; + for (j = 0;j < nb_curve;j++) { + if (variable_type[j] == OBSERVED_VALUE) { + if (variable1[j] != variable2[j]) { + autocorrelation = false; + } + else if (variable1[j] == variable2[j]) { + cross_correlation = false; + } + } + + else { + cross_correlation = false; + } + } + + if (autocorrelation) { + out_file << SEQ_label[SEQL_AUTO]; + } + else if (cross_correlation) { + out_file << SEQ_label[SEQL_CROSS]; + } + out_file << SEQ_label[SEQL_CORRELATION_FUNCTION] << "\"\n\n"; + + out_file << "set xlabel \"" << SEQ_label[SEQL_LAG] << "\"" << endl; + + if (length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [" << offset << ":" << length - 1 << "] [-1:1] "; + for (j = 0;j < nb_curve;j++) { + out_file << "\"" << label((data_file_name.str()).c_str()) << "\" using " << j + 1 << " title \""; + + if (variable_type[j] == OBSERVED_VALUE) { + out_file << STAT_label[STATL_VARIABLE] << " " << variable1[j]; + if (variable1[j] != variable2[j]) { + out_file << " " << STAT_label[STATL_VARIABLE] << " " << variable2[j]; + } + } + + else { + switch (variable_type[j]) { + case OBSERVED_STATE : + out_file << SEQ_label[SEQL_OBSERVED] << " " << STAT_label[STATL_STATE]; + break; + case THEORETICAL_STATE : + out_file << SEQ_label[SEQL_THEORETICAL] << " " << STAT_label[STATL_STATE]; + break; + case OBSERVED_OUTPUT : + out_file << SEQ_label[SEQL_OBSERVED] << " " << STAT_label[STATL_OUTPUT]; + break; + case THEORETICAL_OUTPUT : + out_file << SEQ_label[SEQL_THEORETICAL] << " " << STAT_label[STATL_OUTPUT]; + break; + } + + out_file << " " << variable1[j]; + } + + out_file << "\" with linespoints"; + if (j < nb_curve - 1) { + out_file << ",\\" << endl; + } + } + + j = nb_curve + 1; + if (theoretical_function) { + out_file << ",\\\n\"" << label((data_file_name.str()).c_str()) << "\" using " << j++ << " title \""; + switch (function_type) { + case AUTOREGRESSIVE : + out_file << SEQ_label[SEQL_AUTOREGRESSIVE_MODEL]; + break; + case WHITE_NOISE : + out_file << SEQ_label[SEQL_WHITE_NOISE]; + break; + } + out_file << "\" with linespoints"; + } + if (frequency) { + out_file << ",\\\n\"" << label((data_file_name.str()).c_str()) << "\" using " << j++ + << " notitle with lines"; + out_file << ",\\\n\"" << label((data_file_name.str()).c_str()) << "\" using " << j++ + << " notitle with lines"; + } + out_file << endl; + + out_file << "set xlabel" << endl; + + if (length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (frequency) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set xlabel \"" << SEQ_label[SEQL_LAG] << "\"" << endl; + out_file << "set ylabel \"" << SEQ_label[SEQL_PAIR_FREQUENCY] << "\"" << endl; + + if (length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if (frequency[0] < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << length - 1 << "] [0:" << frequency[0] << "] \"" + << label((data_file_name.str()).c_str()) << "\" using " << j++ + << " notitle with impulses" << endl; + + out_file << "set xlabel" << endl; + out_file << "set ylabel" << endl; + + if (length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if (frequency[0] < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a Correlation object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* Correlation::get_plotable() const + +{ + bool autocorrelation , cross_correlation; + int i , j; + int nb_plot; + double standard_normal_value , *confidence_limit = NULL; + ostringstream title , legend; + MultiPlotSet *plot_set; + + + plot_set = new MultiPlotSet(frequency ? 2 : 1); + MultiPlotSet &plot = *plot_set; + + title.str(""); + + switch (type) { + case SPEARMAN : + title << SEQ_label[SEQL_SPEARMAN] << " "; + break; + case KENDALL : + title << SEQ_label[SEQL_KENDALL] << " "; + break; + } + + if (offset == 1) { + title << SEQ_label[SEQL_PARTIAL] << " "; + } + + if ((type == SPEARMAN) || (type == KENDALL)) { + title << SEQ_label[SEQL_RANK] << " "; + } + + autocorrelation = true; + cross_correlation = true; + for (j = 0;j < nb_curve;j++) { + if (variable_type[j] == OBSERVED_VALUE) { + if (variable1[j] != variable2[j]) { + autocorrelation = false; + } + else if (variable1[j] == variable2[j]) { + cross_correlation = false; + } + } + + else { + cross_correlation = false; + } + } + + if (autocorrelation) { + title << SEQ_label[SEQL_AUTO]; + } + else if (cross_correlation) { + title << SEQ_label[SEQL_CROSS]; + } + title << SEQ_label[SEQL_CORRELATION_FUNCTION]; + + plot.title = title.str(); + + plot.border = "15 lw 0"; + + // correlation function + + plot[0].xrange = Range(0 , length - 1); + plot[0].yrange = Range(-1., 1.); + + plot[0].xlabel = SEQ_label[SEQL_LAG]; + + if (length - 1 < TIC_THRESHOLD) { + plot[0].xtics = 1; + } + + nb_plot = nb_curve; + if (theoretical_function) { + nb_plot++; + } + + // computation of the confidence limits + + if (frequency) { + normal dist; + standard_normal_value = quantile(complement(dist , 0.025)); + + confidence_limit = new double[length]; + + for (i = 0;i < length;i++) { + switch (type) { + case PEARSON : + confidence_limit[i] = standard_normal_value / sqrt((double)frequency[i]); + break; + case SPEARMAN : + confidence_limit[i] = standard_normal_value / sqrt((double)frequency[i]); + break; + case KENDALL : + confidence_limit[i] = standard_normal_value * sqrt((2 * (2 * (double)frequency[i] + 5)) / + (9 * (double)frequency[i] * (double)(frequency[i] - 1))); + break; + } + } + + nb_plot += 2; + } + + plot[0].resize(nb_plot); + + for (i = 0;i < nb_curve;i++) { + legend.str(""); + + if (variable_type[i] == OBSERVED_VALUE) { + legend << STAT_label[STATL_VARIABLE] << " " << variable1[i]; + if (variable1[i] != variable2[i]) { + legend << " " << STAT_label[STATL_VARIABLE] << " " << variable2[i]; + } + } + + else { + switch (variable_type[i]) { + case OBSERVED_STATE : + legend << SEQ_label[SEQL_OBSERVED] << " " << STAT_label[STATL_STATE]; + break; + case THEORETICAL_STATE : + legend << SEQ_label[SEQL_THEORETICAL] << " " << STAT_label[STATL_STATE]; + break; + case OBSERVED_OUTPUT : + legend << SEQ_label[SEQL_OBSERVED] << " " << STAT_label[STATL_OUTPUT]; + break; + case THEORETICAL_OUTPUT : + legend << SEQ_label[SEQL_THEORETICAL] << " " << STAT_label[STATL_OUTPUT]; + break; + } + + legend << " " << variable1[i]; + } + + plot[0][i].legend = legend.str(); + + plot[0][i].style = "linespoints"; + + plotable_write(i , plot[0][i]); + } + + i = nb_curve; + if (theoretical_function) { + switch (function_type) { + case AUTOREGRESSIVE : + plot[0][i].legend = SEQ_label[SEQL_AUTOREGRESSIVE_MODEL]; + break; + case WHITE_NOISE : + plot[0][i].legend = SEQ_label[SEQL_WHITE_NOISE]; + break; + } + + plot[0][i].style = "linespoints"; + + for (j = 0;j < length;j++) { + plot[0][i].add_point(j , theoretical_function[j]); + } + i++; + } + + if (frequency) { + plot[0][i].style = "lines"; + + for (j = 0;j < length;j++) { + plot[0][i].add_point(j , confidence_limit[j]); + } + i++; + + plot[0][i].style = "lines"; + + for (j = 0;j < length;j++) { + plot[0][i].add_point(j , -confidence_limit[j]); + } + + // frequencies + + plot[1].xrange = Range(0 , length - 1); + plot[1].yrange = Range(0 , frequency[0]); + + plot[1].xlabel = SEQ_label[SEQL_LAG]; + plot[1].ylabel = SEQ_label[SEQL_PAIR_FREQUENCY]; + + if (length - 1 < TIC_THRESHOLD) { + plot[1].xtics = 1; + } + if (frequency[0] < TIC_THRESHOLD) { + plot[1].ytics = 1; + } + + plot[1].resize(1); + + plot[1][0].style = "impulses"; + + plotable_frequency_write(plot[1][0]); + + delete [] confidence_limit; + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of a correlation function on the basis of a Sequences object. + * + * \param[in] correl reference on a Correlation object, + * \param[in] variable1 1st variable index, + * \param[in] variable2 2nd variable index, + * \param[in] normalization normalization (APPROXIMATED/EXACT), + * \param[in] individual_mean flag mean computation by individual or globally. + */ +/*--------------------------------------------------------------*/ + +void Sequences::correlation_computation(Correlation &correl , int variable1 , int variable2 , + correlation_normalization normalization , bool individual_mean) const + +{ + if (correl.type == PEARSON) { + int i , j , k; + int max_lag = correl.length - 1; + double variance1 , variance2 , diff , norm , *mean1 , *mean2; + + + // computation of means and variances + +/* mean1 = mean_computation(variable1); + + if (variable1 == variable2) { + mean2 = mean1; + norm = variance_computation(variable1 , mean1); + } + else { + mean2 = mean_computation(variable2); + norm = sqrt(variance_computation(variable1 , mean1) * + variance_computation(variable2 , mean2)); + } + + norm *= (cumul_length - 1); */ + + mean1 = new double[nb_sequence]; + mean2 = new double[nb_sequence]; + + if (individual_mean) { + variance1 = 0.; + + if (type[variable1] != REAL_VALUE) { + for (i = 0;i < nb_sequence;i++) { + mean1[i] = 0.; + for (j = 0;j < length[i];j++) { + mean1[i] += int_sequence[i][variable1][j]; + } + mean1[i] /= length[i]; + + for (j = 0;j < length[i];j++) { + diff = int_sequence[i][variable1][j] - mean1[i]; + variance1 += diff * diff; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + mean1[i] = 0.; + for (j = 0;j < length[i];j++) { + mean1[i] += real_sequence[i][variable1][j]; + } + mean1[i] /= length[i]; + + for (j = 0;j < length[i];j++) { + diff = real_sequence[i][variable1][j] - mean1[i]; + variance1 += diff * diff; + } + } + } + + if (variable1 == variable2) { + for (i = 0;i < nb_sequence;i++) { + mean2[i] = mean1[i]; + } + norm = variance1; + } + + else { + variance2 = 0.; + + if (type[variable2] != REAL_VALUE) { + for (i = 0;i < nb_sequence;i++) { + mean2[i] = 0.; + for (j = 0;j < length[i];j++) { + mean2[i] += int_sequence[i][variable2][j]; + } + mean2[i] /= length[i]; + + for (j = 0;j < length[i];j++) { + diff = int_sequence[i][variable2][j] - mean2[i]; + variance2 += diff * diff; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + mean2[i] = 0.; + for (j = 0;j < length[i];j++) { + mean2[i] += real_sequence[i][variable2][j]; + } + mean2[i] /= length[i]; + + for (j = 0;j < length[i];j++) { + diff = real_sequence[i][variable2][j] - mean2[i]; + variance2 += diff * diff; + } + } + } + + norm = sqrt(variance1 * variance2); + } + } + + else { + mean1[0] = mean_computation(variable1); + for (i = 1;i < nb_sequence;i++) { + mean1[i] = mean1[0]; + } + + if (variable1 == variable2) { + for (i = 0;i < nb_sequence;i++) { + mean2[i] = mean1[i]; + } + norm = variance_computation(variable1 , mean1[0]); + } + + else { + mean2[0] = mean_computation(variable2); + for (i = 1;i < nb_sequence;i++) { + mean2[i] = mean2[0]; + } + norm = sqrt(variance_computation(variable1 , mean1[0]) * + variance_computation(variable2 , mean2[0])); + } + + norm *= (cumul_length - 1); + } + + // computation of the correlation coefficients + + for (i = 0;i <= max_lag;i++) { + correl.point[0][i] = 0.; + correl.frequency[i] = 0; + + for (j = 0;j < nb_sequence;j++) { + if (length[j] > i) { + if ((type[variable1] != REAL_VALUE) && (type[variable2] != REAL_VALUE)) { + for (k = i;k < length[j];k++) { + correl.point[0][i] += (int_sequence[j][variable1][k] - mean1[j]) * + (int_sequence[j][variable2][k - i] - mean2[j]); + } + } + else if ((type[variable1] != REAL_VALUE) && (type[variable2] == REAL_VALUE)) { + for (k = i;k < length[j];k++) { + correl.point[0][i] += (int_sequence[j][variable1][k] - mean1[j]) * + (real_sequence[j][variable2][k - i] - mean2[j]); + } + } + else if ((type[variable1] == REAL_VALUE) && (type[variable2] != REAL_VALUE)) { + for (k = i;k < length[j];k++) { + correl.point[0][i] += (real_sequence[j][variable1][k] - mean1[j]) * + (int_sequence[j][variable2][k - i] - mean2[j]); + } + } +// else if ((type[variable1] == REAL_VALUE) && (type[variable2] == REAL_VALUE)) { + else { + for (k = i;k < length[j];k++) { + correl.point[0][i] += (real_sequence[j][variable1][k] - mean1[j]) * + (real_sequence[j][variable2][k - i] - mean2[j]); + } + } + + correl.frequency[i] += length[j] - i; + } + } + + switch (normalization) { + case APPROXIMATED : + correl.point[0][i] /= norm; + break; + case EXACT : + correl.point[0][i] *= cumul_length / (correl.frequency[i] * norm); + break; + } + +// if (correl.frequency[i] <= CORRELATION_MIN_FREQUENCY) { + if (correl.frequency[i] <= cumul_length * CORRELATION_FREQUENCY_RATIO) { + correl.length = i + 1; + break; + } + } + + if (normalization == APPROXIMATED) { + for (i = 0;i < correl.length;i++) { + correl.frequency[i] = cumul_length; + } + } + + delete [] mean1; + delete [] mean2; + } + + else if (correl.type == SPEARMAN) { + int i , j , k; + int max_lag = correl.length - 1 , *pfrequency; + double main_term , correction , norm , rank_mean , *rank[2]; + + + // computation of the main term and the correction term for tied values + + main_term = cumul_length * ((double)cumul_length * (double)cumul_length - 1); + + pfrequency = marginal_distribution[variable1]->frequency + marginal_distribution[variable1]->offset; + correction = 0.; + for (i = marginal_distribution[variable1]->offset;i < marginal_distribution[variable1]->nb_value;i++) { + if (*pfrequency > 1) { + correction += *pfrequency * ((double)*pfrequency * (double)*pfrequency - 1); + } + pfrequency++; + } + + if (variable1 == variable2) { + norm = (main_term - correction) / 12.; + } + + else { + norm = sqrt(main_term - correction) / 12.; + + pfrequency = marginal_distribution[variable2]->frequency + marginal_distribution[variable2]->offset; + correction = 0.; + for (i = marginal_distribution[variable2]->offset;i < marginal_distribution[variable2]->nb_value;i++) { + if (*pfrequency > 1) { + correction += *pfrequency * ((double)*pfrequency * (double)*pfrequency - 1); + } + pfrequency++; + } + + norm *= sqrt(main_term - correction); + } + + // rank computation + + rank_mean = (double)(cumul_length + 1) / 2.; + + rank[0] = marginal_distribution[variable1]->rank_computation(); + + if (variable1 == variable2) { + rank[1] = new double[marginal_distribution[variable1]->nb_value]; + + for (i = marginal_distribution[variable1]->offset;i < marginal_distribution[variable1]->nb_value;i++) { + rank[1][i] = rank[0][i]; + } + } + + else { + rank[1] = marginal_distribution[variable2]->rank_computation(); + } + + // computation of the correlation coefficients + + for (i = 0;i <= max_lag;i++) { + correl.point[0][i] = 0.; + correl.frequency[i] = 0; + + // computation of the centered rank differences + + for (j = 0;j < nb_sequence;j++) { + if (length[j] > i) { + for (k = i;k < length[j];k++) { + correl.point[0][i] += (rank[0][int_sequence[j][variable1][k]] - rank_mean) * + (rank[1][int_sequence[j][variable2][k - i]] - rank_mean); + } + correl.frequency[i] += length[j] - i; + } + } + + switch (normalization) { + case APPROXIMATED : + correl.point[0][i] /= norm; + break; + case EXACT : + correl.point[0][i] *= cumul_length / (correl.frequency[i] * norm); + break; + } + +// if (correl.frequency[i] <= CORRELATION_MIN_FREQUENCY) { + if (correl.frequency[i] <= cumul_length * CORRELATION_FREQUENCY_RATIO) { + correl.length = i + 1; + break; + } + } + + if (normalization == APPROXIMATED) { + for (i = 0;i < correl.length;i++) { + correl.frequency[i] = cumul_length; + } + } + + for (i = 0;i < 2;i++) { + delete [] rank[i]; + } + } + + else { + int i , j , k; + int max_lag = correl.length - 1 , nb_vector , **int_vector; + Vectors *vec; + + + int_vector = new int*[cumul_length]; + for (i = 0;i < cumul_length;i++) { + int_vector[i] = new int[2]; + } + + for (i = 0;i <= max_lag;i++) { + + // constitution of the vector sample + + nb_vector = 0; + for (j = 0;j < nb_sequence;j++) { + if (length[j] > i) { + for (k = i;k < length[j];k++) { + int_vector[nb_vector][0] = int_sequence[j][variable1][k]; + int_vector[nb_vector][1] = int_sequence[j][variable2][k - i]; + nb_vector++; + } + } + } + correl.frequency[i] = nb_vector; + + vec = new Vectors(nb_vector , NULL , 2 , int_vector); + + // computation of the correlation coefficient + + switch (correl.type) { + case SPEARMAN2 : + correl.point[0][i] = vec->spearman_rank_single_correlation_computation(); + break; + case KENDALL : + correl.point[0][i] = vec->kendall_rank_single_correlation_computation(); + break; + } + + delete vec; + +// if (correl.frequency[i] <= CORRELATION_MIN_FREQUENCY) { + if (correl.frequency[i] <= cumul_length * CORRELATION_FREQUENCY_RATIO) { + correl.length = i + 1; + break; + } + } + + for (i = 0;i < cumul_length;i++) { + delete [] int_vector[i]; + } + delete [] int_vector; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of a correlation function on the basis of a Sequences object. + * + * \param[in] error reference on a StatError object, + * \param[in] variable1 1st variable index, + * \param[in] variable2 2nd variable index, + * \param[in] itype correlation coefficient type (PEARSON/SPEARMAN/KENDALL), + * \param[in] max_lag maximum lag, + * \param[in] normalization normalization (APPROXIMATED/EXACT), + * \param[in] individual_mean flag mean computation by individual or globally. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* Sequences::correlation_computation(StatError &error , int variable1 , int variable2 , + correlation_type itype , int max_lag , + correlation_normalization normalization , + bool individual_mean) const + +{ + bool status = true; + Correlation *correl; + + + correl = NULL; + error.init(); + + if ((variable1 < 1) || (variable1 > nb_variable)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable1 << ": " + << STAT_error[STATR_VARIABLE_INDEX]; + error.update((error_message.str()).c_str()); + } + + else { + variable1--; + + if ((itype == PEARSON) && (type[variable1] != INT_VALUE) && (type[variable1] != STATE) && + (type[variable1] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable1 + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + +// if (((itype == SPEARMAN) || (itype == KENDALL)) && (!marginal_distribution[variable1])) { + if (((itype == SPEARMAN) || (itype == SPEARMAN2) || (itype == KENDALL)) && + (!marginal_distribution[variable1])) { + status = false; + ostringstream error_message; + error_message << STAT_error[STATR_RANK_CORRELATION_COMPUTATION] << ": " + << STAT_label[STATL_VARIABLE] << " " << variable1 + 1 << " " + << STAT_error[STATR_SHIFTED_SCALED]; + error.update((error_message.str()).c_str()); + } + } + + if ((variable2 < 1) || (variable2 > nb_variable)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable2 << ": " + << STAT_error[STATR_VARIABLE_INDEX]; + error.update((error_message.str()).c_str()); + } + + else { + variable2--; + + if ((itype == PEARSON) && (type[variable2] != INT_VALUE) && (type[variable2] != STATE) && + (type[variable2] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable2 + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + +// if (((itype == SPEARMAN) || (itype == KENDALL)) && (!marginal_distribution[variable2])) { + if (((itype == SPEARMAN) || (itype == SPEARMAN2) || (itype == KENDALL)) && + (!marginal_distribution[variable2])) { + status = false; + ostringstream error_message; + error_message << STAT_error[STATR_RANK_CORRELATION_COMPUTATION] << ": " + << STAT_label[STATL_VARIABLE] << " " << variable2 + 1 << " " + << STAT_error[STATR_SHIFTED_SCALED]; + error.update((error_message.str()).c_str()); + } + } + + if ((max_lag < I_DEFAULT) || (max_lag >= max_length)) { + status = false; + error.update(SEQ_error[SEQR_MAX_LAG]); + } + + if (status) { + if (max_lag == I_DEFAULT) { + max_lag = max_length - 1; + } + + // construction of the correlation function + +// correl = new Correlation(itype , max_lag , variable1 + 1 , variable2 + 1); + correl = new Correlation((itype == SPEARMAN2 ? SPEARMAN : itype) , max_lag , variable1 + 1 , variable2 + 1); + correlation_computation(*correl , variable1 , variable2 , normalization , individual_mean); + } + + return correl; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the theoretical autocorrelation function of a first-order + * autoregressive model. + * + * \param[in] error reference on a StatError object, + * \param[in] autoregressive_coef autoregressive coefficient. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Correlation::autoregressive_model_autocorrelation(StatError &error , double autoregressive_coeff) + +{ + bool status = true; + int i; + + + error.init(); + + if ((type != PEARSON) || (offset != 0)) { + status = false; + ostringstream correction_message; + correction_message << SEQ_label[SEQL_PEARSON] << " " + << SEQ_label[SEQL_CORRELATION_FUNCTION]; + error.correction_update(SEQ_error[SEQR_CORRELATION_COEFF_TYPE] , (correction_message.str()).c_str()); + } + + else { + for (i = 0;i < nb_curve;i++) { + if ((point[i][0] < 1. - DOUBLE_ERROR) || (point[i][0] > 1. + DOUBLE_ERROR)) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_CORRELATION_FUNCTION] << " " << i + 1 << " " + << SEQ_error[SEQR_INCOMPATIBLE_CORRELATION_FUNCTION]; + error.update((error_message.str()).c_str()); + } + } + } + + if ((autoregressive_coeff < -1.) || (autoregressive_coeff > 1.)) { + status = false; + error.update(SEQ_error[SEQR_AUTOREGRESSIVE_COEFF]); + } + + if (status) { + delete [] theoretical_function; + function_type = AUTOREGRESSIVE; + theoretical_function = new double[length]; + + theoretical_function[0] = 1.; + for (i = 1;i < length;i++) { + theoretical_function[i] = theoretical_function[i - 1] * autoregressive_coeff; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the theoretical correlation function of a white noise + * for a given filter. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_point filter width, + * \param[in] filter filter, + * \param[in] residual flag computation of the filter corresponding to the residuals. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Correlation::white_noise_correlation(StatError &error , int nb_point , double *filter , + int residual) + +{ + bool status = true; + int i , j; + double variance; + + + error.init(); + + if ((type != PEARSON) || (offset != 0)) { + status = false; + ostringstream correction_message; + correction_message << SEQ_label[SEQL_PEARSON] << " " + << SEQ_label[SEQL_CORRELATION_FUNCTION]; + error.correction_update(SEQ_error[SEQR_CORRELATION_COEFF_TYPE] , (correction_message.str()).c_str()); + } + + else { + for (i = 1;i < nb_curve;i++) { + if ((point[i][0] < point[0][0] - DOUBLE_ERROR) || (point[i][0] > point[0][0] + DOUBLE_ERROR)) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_CORRELATION_FUNCTION] << " " << i + 1 << " " + << SEQ_error[SEQR_INCOMPATIBLE_CORRELATION_FUNCTION]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + if (residual) { + for (i = 0;i < nb_point;i++) { + filter[i] = -filter[i]; + } + filter[nb_point / 2]++; + } + + delete [] theoretical_function; + function_type = WHITE_NOISE; + theoretical_function = new double[length]; + + variance = 0.; + for (i = 0;i < nb_point;i++) { + variance += filter[i] * filter[i]; + } + + theoretical_function[0] = point[0][0]; + for (i = 1;i < MIN(nb_point , length);i++) { + theoretical_function[i] = 0.; + for (j = 0;j < nb_point - i;j++) { + theoretical_function[i] += filter[i + j] * filter[j]; + } + theoretical_function[i] = theoretical_function[i] * point[0][0] / variance; + } + + for (i = nb_point;i < length;i++) { + theoretical_function[i] = 0.; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the theoretical correlation function of a white noise + * for a given filter. + * + * \param[in] error reference on a StatError object, + * \param[in] dist symmetric discrete distribution. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Correlation::white_noise_correlation(StatError &error , const Distribution &dist) + +{ + bool status = true; + + + error.init(); + + if ((dist.offset != 0) || ((dist.nb_value - dist.offset) % 2 == 0)) { + status = false; + error.correction_update(STAT_error[STATR_NB_VALUE] , STAT_error[STATR_ODD]); + } + if (fabs(dist.skewness_computation()) > SKEWNESS_ROUNDNESS) { + status = false; + error.update(STAT_error[STATR_NON_SYMMETRICAL_DISTRIBUTION]); + } + if (dist.complement > 0.) { + status = false; + error.update(STAT_error[STATR_UNPROPER_DISTRIBUTION]); + } + + if (status) { + status = white_noise_correlation(error , dist.nb_value , dist.mass); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the theoretical correlation function of a white noise + * for a differentiation. + * + * \param[in] error reference on a StatError object, + * \param[in] order differentiation order. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Correlation::white_noise_correlation(StatError &error , int order) + +{ + bool status = true; + int i; + double *filter; + + + error.init(); + + if ((order < 1) || (order > MAX_DIFFERENCING_ORDER)) { + status = false; + error.update(SEQ_error[SEQR_DIFFERENCING_ORDER]); + } + + if (status) { + filter = new double[order + 1]; + + filter[0] = 1.; + for (i = 1;i <= order;i++) { + filter[i] = -filter[i - 1] * (order - i + 1) / i; + } + +# ifdef DEBUG + cout << "\nfilter : "; + for (i = 0;i <= order;i++) { + cout << filter[i] << " "; + } + cout << endl; +# endif + + status = white_noise_correlation(error , order + 1 , filter , false); + + delete [] filter; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of a partial autocorrelation function on the basis of a Sequences object. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] itype correlation coefficient type (PEARSON/KENDALL), + * \param[in] max_lag maximum lag. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* Sequences::partial_autocorrelation_computation(StatError &error , int variable , + correlation_type itype , int max_lag) const + +{ + bool status = true; + int i , j; + double sum , denom , *ppoint , *cpoint1 , *cpoint2 , *aux_correl , *paux_correl , + *aux , *paux1 , *paux2; + Correlation *correl , *partial_correl; + + + partial_correl = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((itype == PEARSON) && (type[variable] != INT_VALUE) && (type[variable] != STATE) && + (type[variable] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + if ((itype == KENDALL) && (!marginal_distribution[variable])) { + status = false; + ostringstream error_message; + error_message << STAT_error[STATR_RANK_CORRELATION_COMPUTATION] << ": " + << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " " + << STAT_error[STATR_SHIFTED_SCALED]; + error.update((error_message.str()).c_str()); + } + } + + if ((max_lag != I_DEFAULT) && ((max_lag < 1) || (max_lag >= max_length))) { + status = false; + error.update(SEQ_error[SEQR_MAX_LAG]); + } + + if (status) { + if (max_lag == I_DEFAULT) { + max_lag = max_length - 1; + } + + // construction of the autocorrelation function + + correl = correlation_computation(error , variable + 1 , variable + 1 , itype , + max_lag , APPROXIMATED); + max_lag = correl->length - 1; + + // construction of the partial autocorrelation function + + partial_correl = new Correlation(itype , max_lag , variable + 1 , variable + 1); + partial_correl->offset = 1; + + // computation of partial correlation coefficients + + paux_correl = new double[max_lag + 1]; + aux_correl = new double[max_lag]; + + ppoint = partial_correl->point[0]; + cpoint2 = correl->point[0] + 1; + + *ppoint++ = 0.; + partial_correl->frequency[0] = correl->frequency[0]; + + denom = 1.; + for (i = 1;i <= max_lag;i++) { + partial_correl->frequency[i] = correl->frequency[i]; + + cpoint1 = correl->point[0] + i; +// cpoint2 = correl->point[0] + 1; + paux1 = paux_correl + 1; + sum = 0.; +// double sum2 = 0.; + for (j = 1;j < i;j++) { +// sum2 += *paux1 * *cpoint2++; + sum += *paux1++ * *--cpoint1; + } + +// *ppoint = (*cpoint2 - sum) / (1. - sum2); + + *ppoint = (*cpoint2++ - sum) / denom; + denom *= (1. - *ppoint * *ppoint); + + aux = aux_correl + 1; + paux1 = paux_correl + 1; + paux2 = paux_correl + i; + for (j = 1;j < i;j++) { + *aux++ = *paux1++ - *ppoint * *--paux2; + } + + paux1 = paux_correl + 1; + aux = aux_correl + 1; + for (j = 1;j < i;j++) { + *paux1++ = *aux++; + } + + *paux1 = *ppoint++; + } + + delete correl; + delete [] paux_correl; + delete [] aux_correl; + } + + return partial_correl; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/hidden_semi_markov.cpp b/src/cpp/sequence_analysis/hidden_semi_markov.cpp new file mode 100644 index 0000000..53892b0 --- /dev/null +++ b/src/cpp/sequence_analysis/hidden_semi_markov.cpp @@ -0,0 +1,898 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include + +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "hidden_semi_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkov class. + * + * \param[in] pchain pointer on a Chain object, + * \param[in] poccupancy pointer on a CategoricalSequenceProcess object, + * \param[in] inb_output_process number of observation processes, + * \param[in] pobservation pointer on CategoricalProcess objects, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov::SemiMarkov(const Chain *pchain , const CategoricalSequenceProcess *poccupancy , + int inb_output_process , CategoricalProcess **pobservation , + int length , bool counting_flag) +:SemiMarkovChain(pchain , poccupancy) + +{ + int i; + + + nb_iterator = 0; + semi_markov_data = NULL; + + state_process = new CategoricalSequenceProcess(*poccupancy); + + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + state_process->absorption[i] = 0.; + } + else { + state_process->absorption[i] = 1.; + } + } + +# ifdef DEBUG + assert(sojourn_type != NULL); + assert(forward != NULL); +# endif + + /* sojourn_type = new state_sojourn_type[nb_state]; + forward = new Forward*[nb_state]; + + for (i = 0;i < nb_state;i++) { + sojourn_type[i] = (state_process->sojourn_time[i] ? SEMI_MARKOVIAN : MARKOVIAN); + + if ((sojourn_type[i] == SEMI_MARKOVIAN) && (stype[i] == RECURRENT)) { + forward[i] = new Forward(*(state_process->sojourn_time[i])); + } + else { + forward[i] = NULL; + } + }*/ + + if (type == EQUILIBRIUM) { + for (i = 0;i < nb_state;i++) { + initial[i] = 1. / (double)nb_state; + } + initial_probability_computation(); + } + + nb_output_process = inb_output_process; + + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + for (i = 0;i < nb_output_process;i++) { + categorical_process[i] = new CategoricalSequenceProcess(*pobservation[i]); + } + + if (length > COUNTING_MAX_LENGTH) { + counting_flag = false; + } + characteristic_computation(length , counting_flag); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkov class. + * + * \param[in] pchain pointer on a Chain object, + * \param[in] poccupancy pointer on a CategoricalSequenceProcess object, + * \param[in] inb_output_process number of observation processes, + * \param[in] categorical_observation pointer on CategoricalProcess objects, + * \param[in] discrete_parametric_observation pointer on DiscreteParametricProcess objects, + * \param[in] continuous_parametric_observation pointer on ContinuousParametricProcess objects, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov::SemiMarkov(const Chain *pchain , const CategoricalSequenceProcess *poccupancy , + int inb_output_process , CategoricalProcess **categorical_observation , + DiscreteParametricProcess **discrete_parametric_observation , + ContinuousParametricProcess **continuous_parametric_observation , + int length , bool counting_flag) +:SemiMarkovChain(pchain , poccupancy) + +{ + int i; + + + nb_iterator = 0; + semi_markov_data = NULL; + + state_process = new CategoricalSequenceProcess(*poccupancy); + + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + state_process->absorption[i] = 0.; + } + else { + state_process->absorption[i] = 1.; + } + } + +# ifdef DEBUG + assert(sojourn_type != NULL); + assert(forward != NULL); +# endif + // forward = new Forward*[nb_state]; + +/* for (i = 0;i < nb_state;i++) { + sojourn_type[i] = (state_process->sojourn_time[i] ? SEMI_MARKOVIAN : MARKOVIAN); + + if ((sojourn_type[i] == SEMI_MARKOVIAN) && (stype[i] == RECURRENT)) { + forward[i] = new Forward(*(state_process->sojourn_time[i])); + } + else { + forward[i] = NULL; + } + }*/ + + if (type == EQUILIBRIUM) { + for (i = 0;i < nb_state;i++) { + initial[i] = 1. / (double)nb_state; + } + initial_probability_computation(); + } + + nb_output_process = inb_output_process; + + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + discrete_parametric_process = new DiscreteParametricProcess*[nb_output_process]; + continuous_parametric_process = new ContinuousParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (categorical_observation[i]) { + categorical_process[i] = new CategoricalSequenceProcess(*categorical_observation[i]); + discrete_parametric_process[i] = NULL; + continuous_parametric_process[i] = NULL; + } + else if (discrete_parametric_observation[i]) { + categorical_process[i] = NULL; + discrete_parametric_process[i] = new DiscreteParametricProcess(*discrete_parametric_observation[i]); + continuous_parametric_process[i] = NULL; + } + else { + categorical_process[i] = NULL; + discrete_parametric_process[i] = NULL; + continuous_parametric_process[i] = new ContinuousParametricProcess(*continuous_parametric_observation[i]); + } + } + + if (length > COUNTING_MAX_LENGTH) { + counting_flag = false; + } + characteristic_computation(length , counting_flag); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the HiddenSemiMarkov class. + */ +/*--------------------------------------------------------------*/ + +HiddenSemiMarkov::~HiddenSemiMarkov() {} + + +/*--------------------------------------------------------------*/ +/** + * \brief Application of a threshold on the probability parameters of a hidden semi-Markov chain. + * + * \param[in] min_probability minimum probability. + * + * \return HiddenSemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenSemiMarkov* HiddenSemiMarkov::thresholding(double min_probability) const + +{ + int i; + HiddenSemiMarkov *hsmarkov; + + + hsmarkov = new HiddenSemiMarkov(*this , false , false); + hsmarkov->Chain::thresholding(min_probability , true); + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + hsmarkov->categorical_process[i]->thresholding(min_probability); + } + } + + return hsmarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a HiddenSemiMarkov object from a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] cumul_threshold threshold on the cumulative parametric distribution functions, + * \param[in] old_format flag format of the observation processes. + * + * \return HiddenSemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenSemiMarkov* HiddenSemiMarkov::ascii_read(StatError &error , const string path , + int length , bool counting_flag , + double cumul_threshold , bool old_format) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + process_type type = DEFAULT_TYPE; + bool status , lstatus; + int i; + int line , nb_output_process , value , index; + observation_process obs_type; + const Chain *chain; + const CategoricalSequenceProcess *occupancy; + CategoricalProcess **categorical_observation; + DiscreteParametricProcess **discrete_parametric_observation; + ContinuousParametricProcess **continuous_parametric_observation; + HiddenSemiMarkov *hsmarkov; + ifstream in_file(path.c_str()); + + + hsmarkov = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + line = 0; + + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + + // test (EQUILIBRIUM_)HIDDEN_SEMI-MARKOV_CHAIN keyword + + if (i == 0) { + if (*token == SEQ_word[SEQW_HIDDEN_SEMI_MARKOV_CHAIN]) { + type = ORDINARY; + } + else if (*token == SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_SEMI_MARKOV_CHAIN]) { + type = EQUILIBRIUM; + } + else { + status = false; + ostringstream correction_message; + correction_message << SEQ_word[SEQW_HIDDEN_SEMI_MARKOV_CHAIN] << " or " + << SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_SEMI_MARKOV_CHAIN]; + error.correction_update(STAT_parsing[STATP_KEYWORD] , (correction_message.str()).c_str() , line); + } + } + + i++; + } + + if (i > 0) { + if (i != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + break; + } + } + + if (type != DEFAULT_TYPE) { + + // analysis of the format and reading of the Markov chain + + chain = Chain::parsing(error , in_file , line , type); + + if (chain) { + + // analysis of the format and reading of the state occupancy distributions + + occupancy = CategoricalSequenceProcess::occupancy_parsing(error , in_file , line , + *chain , cumul_threshold); + if (!occupancy) { + status = false; + } + + // analysis of the format and reading of the observation distributions + + if (old_format) { + categorical_observation = CategoricalProcess::old_parsing(error , in_file , line , + chain->nb_state , nb_output_process); + + if (categorical_observation) { + if (status) { + hsmarkov = new HiddenSemiMarkov(chain , occupancy , nb_output_process , + categorical_observation , length , counting_flag); + } + + for (i = 0;i < nb_output_process;i++) { + delete categorical_observation[i]; + } + delete [] categorical_observation; + } + } + + else { + nb_output_process = I_DEFAULT; + + categorical_observation = NULL; + discrete_parametric_observation = NULL; + continuous_parametric_observation = NULL; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (i) { + + // test number of observation processes + + case 0 : { + lstatus = true; + +/* try { + value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + value = atoi(token->c_str()); + + if (lstatus) { + if ((value < 1) || (value > NB_OUTPUT_PROCESS)) { + lstatus = false; + } + else { + nb_output_process = value; + } + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_NB_OUTPUT_PROCESS] , line , i + 1); + } + break; + } + + // test OUTPUT_PROCESS(ES) keyword + + case 1 : { + if (*token != STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , + STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES] , line , i + 1); + } + break; + } + } + + i++; + } + + if (i > 0) { + if (i != 2) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + break; + } + } + + if (nb_output_process == I_DEFAULT) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + else { + categorical_observation = new CategoricalProcess*[nb_output_process]; + discrete_parametric_observation = new DiscreteParametricProcess*[nb_output_process]; + continuous_parametric_observation = new ContinuousParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + categorical_observation[i] = NULL; + discrete_parametric_observation[i] = NULL; + continuous_parametric_observation[i] = NULL; + } + + index = 0; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (i) { + + // test OUTPUT_PROCESS keyword + + case 0 : { + if (*token != STAT_word[STATW_OUTPUT_PROCESS]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_OUTPUT_PROCESS] , line , i + 1); + } + break; + } + + // test observation process index + + case 1 : { + index++; + lstatus = true; + +/* try { + value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + value = atoi(token->c_str()); + + if ((lstatus) && ((value != index) || (value > nb_output_process))) { + lstatus = false; + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_OUTPUT_PROCESS_INDEX] , line , i + 1); + } + break; + } + + // test separator + + case 2 : { + if (*token != ":") { + status = false; + error.update(STAT_parsing[STATP_SEPARATOR] , line , i + 1); + } + break; + } + + // test CATEGORICAL/DISCRETE_PARAMETRIC/CONTINUOUS_PARAMETRIC keyword + + case 3 : { + if ((*token == STAT_word[STATW_CATEGORICAL]) || + (*token == STAT_word[STATW_NONPARAMETRIC])) { + obs_type = CATEGORICAL_PROCESS; + } + else if ((*token == STAT_word[STATW_DISCRETE_PARAMETRIC]) || + (*token == STAT_word[STATW_PARAMETRIC])) { + obs_type = DISCRETE_PARAMETRIC; + } + else if (*token == STAT_word[STATW_CONTINUOUS_PARAMETRIC]) { + obs_type = CONTINUOUS_PARAMETRIC; + } + else { + obs_type = DEFAULT_PROCESS; + status = false; + ostringstream correction_message; + correction_message << STAT_word[STATW_CATEGORICAL] << " or " + << STAT_word[STATW_DISCRETE_PARAMETRIC] << " or " + << STAT_word[STATW_CONTINUOUS_PARAMETRIC]; + error.correction_update(STAT_parsing[STATP_KEYWORD] , (correction_message.str()).c_str() , line , i + 1); + } + break; + } + } + + i++; + } + + if (i > 0) { + if (i != 4) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + switch (obs_type) { + + case CATEGORICAL_PROCESS : { + categorical_observation[index - 1] = CategoricalProcess::parsing(error , in_file , line , + chain->nb_state , + HIDDEN_MARKOV , true); +/* categorical_observation[index - 1] = CategoricalProcess::parsing(error , in_file , line , pour les donnees de suivi de croissance manguier + chain->nb_state , + HIDDEN_MARKOV , false); */ + if (!categorical_observation[index - 1]) { + status = false; + } + break; + } + + case DISCRETE_PARAMETRIC : { + discrete_parametric_observation[index - 1] = DiscreteParametricProcess::parsing(error , in_file , line , + chain->nb_state , + HIDDEN_MARKOV , + cumul_threshold); + if (!discrete_parametric_observation[index - 1]) { + status = false; + } + break; + } + + case CONTINUOUS_PARAMETRIC : { + continuous_parametric_observation[index - 1] = ContinuousParametricProcess::parsing(error , in_file , line , + chain->nb_state , + HIDDEN_MARKOV , + AUTOREGRESSIVE_MODEL); + if (!continuous_parametric_observation[index - 1]) { + status = false; + } + break; + } + } + } + + if (index == nb_output_process) { + break; + } + } + + if (index < nb_output_process) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + else { + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + if (!(trim_right_copy_if(buffer , is_any_of(" \t")).empty())) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + } + + if (status) { + hsmarkov = new HiddenSemiMarkov(chain , occupancy , nb_output_process , + categorical_observation , + discrete_parametric_observation , + continuous_parametric_observation , + length , counting_flag); + } + + for (i = 0;i < nb_output_process;i++) { + delete categorical_observation[i]; + delete discrete_parametric_observation[i]; + delete continuous_parametric_observation[i]; + } + delete [] categorical_observation; + delete [] discrete_parametric_observation; + delete [] continuous_parametric_observation; + } + } + + delete chain; + delete occupancy; + } + } + } + + return hsmarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a HiddenSemiMarkov object in a file. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& HiddenSemiMarkov::ascii_write(ostream &os , bool exhaustive) const + +{ + SemiMarkov::ascii_write(os , semi_markov_data , exhaustive , + false , true); + +// os << "\nEnd state: " << end_state() << endl; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a HiddenSemiMarkov object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + SemiMarkov::ascii_write(out_file , semi_markov_data , exhaustive , + true , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a HiddenSemiMarkov object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::spreadsheet_write(StatError &error , const string path) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + SemiMarkov::spreadsheet_write(out_file , semi_markov_data , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Search for an end state. + * + * \return end state index. + */ +/*--------------------------------------------------------------*/ + +int HiddenSemiMarkov::end_state() const + +{ + int i , j , k; + int end_state = I_DEFAULT , output; + + + for (i = nb_state - 1;i >= 0;i--) { + if (stype[i] == ABSORBING) { + +# ifdef DEBUG + cout << "\nstate: " << i << " | "; +# endif + + end_state = i; + + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + for (k = categorical_process[j]->observation[i]->offset;k < categorical_process[j]->observation[i]->nb_value;k++) { + if (categorical_process[j]->observation[i]->mass[k] == 1.) { + output = k; + +# ifdef DEBUG + cout << "output: " << output << " | "; +# endif + + break; + } + } + + if (k < categorical_process[j]->observation[i]->nb_value) { + for (k = 0;k < nb_state;k++) { + if ((k != i) && (output >= categorical_process[j]->observation[k]->offset) && + (output < categorical_process[j]->observation[k]->nb_value) && + (categorical_process[j]->observation[k]->mass[output] > 0.)) { + end_state = I_DEFAULT; + break; + } + } + if (end_state == I_DEFAULT) { + break; + } + } + + else { + end_state = I_DEFAULT; + break; + } + } + + else { + for (k = discrete_parametric_process[j]->observation[i]->offset;k < discrete_parametric_process[j]->observation[i]->nb_value;k++) { + if (discrete_parametric_process[j]->observation[i]->mass[k] == 1.) { + output = k; + break; + } + } + + if (k < discrete_parametric_process[j]->observation[i]->nb_value) { + for (k = 0;k < nb_state;k++) { + if ((k != i) && (output >= discrete_parametric_process[j]->observation[k]->offset) && + (output < discrete_parametric_process[j]->observation[k]->nb_value) && + (discrete_parametric_process[j]->observation[k]->mass[output] > 0.)) { + end_state = I_DEFAULT; + break; + } + } + if (end_state == I_DEFAULT) { + break; + } + } + + else { + end_state = I_DEFAULT; + break; + } + } + } + +# ifdef DEBUG + cout << "end state: " << end_state << endl; +# endif + + if (end_state == i) { + break; + } + } + } + + return end_state; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/hidden_semi_markov.h b/src/cpp/sequence_analysis/hidden_semi_markov.h new file mode 100644 index 0000000..755c3c9 --- /dev/null +++ b/src/cpp/sequence_analysis/hidden_semi_markov.h @@ -0,0 +1,222 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef HIDDEN_SEMI_MARKOV_H +#define HIDDEN_SEMI_MARKOV_H + + +#include "semi_markov.h" + + +namespace sequence_analysis { + + + +/**************************************************************** + * + * Constants + */ + + + const double SEMI_MARKOV_LIKELIHOOD_DIFF = 1.e-6; // threshold for stopping the EM iterations + const int EXPLORATION_NB_ITER = 10; // number of iterations, exploration phase + const int STOCHASTIC_EXPLORATION_NB_ITER = 5; // number of iterations, exploration phase (MCEM algorithm) + const int SEMI_MARKOV_NB_ITER = 500; // maximum number of EM iterations + + const double MIN_SMOOTHED_PROBABILITY = 1.e-3; // threshold on the smoothed probabilities + + enum state_profile { + SSTATE , // state + IN_STATE , // state entering + OUT_STATE // state exit + }; + + + +/**************************************************************** + * + * Class definition + */ + + + /// \brief Hidden semi-Markov chain + + class HiddenSemiMarkov : public SemiMarkov { + + friend class MarkovianSequences; + + friend std::ostream& operator<<(std::ostream &os , const HiddenSemiMarkov &hsmarkov) + { return hsmarkov.ascii_write(os); } + + private : + + HiddenSemiMarkov(stat_tool::process_type itype , int inb_state , int inb_output_process , int *nb_value) + :SemiMarkov(itype , inb_state , inb_output_process , nb_value) {} + + int end_state() const; + + void forward_backward(SemiMarkovData &seq) const; + double forward_backward(MarkovianSequences &seq , int index , std::ostream *os , + stat_tool::MultiPlotSet *plot_set , + state_profile output , stat_tool::output_format format , + double &max_marginal_entropy , double &entropy1) const; + double forward_backward_sampling(const MarkovianSequences &seq , int index , std::ostream &os , + stat_tool::output_format format = stat_tool::ASCII , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + + void log_computation(); + double viterbi(const MarkovianSequences &seq , double *posterior_probability , + int index = stat_tool::I_DEFAULT) const; + void viterbi(SemiMarkovData &seq) const; + double generalized_viterbi(const MarkovianSequences &seq , int index , std::ostream &os , + double seq_likelihood , stat_tool::output_format format , + int inb_state_sequence) const; + double viterbi_forward_backward(const MarkovianSequences &seq , int index , + std::ostream *os , stat_tool::MultiPlot *plot , + state_profile output , stat_tool::output_format format , + double seq_likelihood = stat_tool::D_INF) const; + + bool state_profile_write(StatError &error , std::ostream &os , const MarkovianSequences &iseq , + int identifier , state_profile output = SSTATE , + stat_tool::output_format format = stat_tool::ASCII , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + + public : + + HiddenSemiMarkov() {} + HiddenSemiMarkov(const Chain *pchain , const CategoricalSequenceProcess *poccupancy , + int inb_output_process , stat_tool::CategoricalProcess **pobservation , + int length , bool counting_flag) + :SemiMarkov(pchain , poccupancy , inb_output_process , pobservation , length , + counting_flag) {} + HiddenSemiMarkov(const Chain *pchain , const CategoricalSequenceProcess *poccupancy , + int inb_output_process , stat_tool::CategoricalProcess **categorical_observation , + stat_tool::DiscreteParametricProcess **discrete_parametric_observation , + stat_tool::ContinuousParametricProcess **continuous_parametric_observation , + int length , bool counting_flag) + :SemiMarkov(pchain , poccupancy , inb_output_process , categorical_observation , + discrete_parametric_observation , + continuous_parametric_observation , length , counting_flag) {} + HiddenSemiMarkov(const HiddenSemiMarkov &hsmarkov , bool data_flag = true , + int param = stat_tool::I_DEFAULT) + :SemiMarkov(hsmarkov , data_flag , param) {} + ~HiddenSemiMarkov(); + + HiddenSemiMarkov* thresholding(double min_probability = MIN_PROBABILITY) const; + + static HiddenSemiMarkov* ascii_read(StatError &error , const std::string path , + int length = DEFAULT_LENGTH , + bool counting_flag = true , + double cumul_threshold = OCCUPANCY_THRESHOLD , + bool old_format = false); + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(StatError &error , const std::string path) const; + + double likelihood_computation(const MarkovianSequences &seq , double *posterior_probability = NULL , + int index = stat_tool::I_DEFAULT) const; + + bool state_profile_ascii_write(StatError &error , std::ostream &os , const MarkovianSequences &iseq , + int identifier , state_profile output = SSTATE , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + bool state_profile_write(StatError &error , const std::string path , const MarkovianSequences &iseq , + int identifier , state_profile output = SSTATE , + stat_tool::output_format format = stat_tool::ASCII , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + bool state_profile_ascii_write(StatError &error , std::ostream &os , int identifier , + state_profile output = SSTATE , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + bool state_profile_write(StatError &error , const std::string path , + int identifier , state_profile output = SSTATE , + stat_tool::output_format format = stat_tool::ASCII , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + + bool state_profile_plot_write(StatError &error , const char *prefix , + const MarkovianSequences &iseq , int identifier , + state_profile output = SSTATE , const char *title = NULL) const; + bool state_profile_plot_write(StatError &error , const char *prefix , int identifier , + state_profile output = SSTATE , const char *title = NULL) const; + + stat_tool::MultiPlotSet* state_profile_plotable_write(StatError &error , + const MarkovianSequences &iseq , + int identifier , state_profile output = SSTATE) const; + stat_tool::MultiPlotSet* state_profile_plotable_write(StatError &error , + int identifier , state_profile output = SSTATE) const; + + SemiMarkovData* state_sequence_computation(StatError &error , std::ostream *os , + const MarkovianSequences &seq , + bool characteristic_flag = true) const; + + SemiMarkovData* simulation(StatError &error , const FrequencyDistribution &hlength , + bool counting_flag = true , bool divergence_flag = false) const; + SemiMarkovData* simulation(StatError &error , int nb_sequence , + int length , bool counting_flag = true) const; + SemiMarkovData* simulation(StatError &error , int nb_sequence , + const MarkovianSequences &iseq , bool counting_flag = true) const; + + /// simulation of semi-markov-switching linear models, which require a single int covariate. + SemiMarkovData* semi_markov_switching_lm_simulation(StatError &error , int nb_sequence , + const Sequences &covariate, + int ivariable=I_DEFAULT, bool counting_flag = true) const; + + + + stat_tool::DistanceMatrix* divergence_computation(StatError &error , std::ostream *os , int nb_model , + const HiddenSemiMarkov **ihsmarkov , + stat_tool::FrequencyDistribution **hlength , + const std::string path = "") const; + stat_tool::DistanceMatrix* divergence_computation(StatError &error , std::ostream *os , int nb_model , + const HiddenSemiMarkov **hsmarkov , int nb_sequence , + int length , const std::string path = "") const; + stat_tool::DistanceMatrix* divergence_computation(StatError &error , std::ostream *os , int nb_model , + const HiddenSemiMarkov **hsmarkov , int nb_sequence , + const MarkovianSequences **seq , const std::string path = "") const; + }; + + +}; // namespace sequence_analysis + + + +#endif diff --git a/src/cpp/sequence_analysis/hidden_variable_order_markov.cpp b/src/cpp/sequence_analysis/hidden_variable_order_markov.cpp new file mode 100644 index 0000000..534e629 --- /dev/null +++ b/src/cpp/sequence_analysis/hidden_variable_order_markov.cpp @@ -0,0 +1,649 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include +#include + +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "hidden_variable_order_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkov class. + * + * \param[in] pmarkov pointer on a VariableOrderMarkovChain object, + * \param[in] inb_output_process number of observation processes, + * \param[in] categorical_observation pointer on CategoricalProcess objects, + * \param[in] discrete_parametric_observation pointer on DiscreteParametricProcess objects, + * \param[in] continuous_parametric_observation pointer on ContinuousParametricProcess objects, + * \param[in] length sequence length. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov::VariableOrderMarkov(const VariableOrderMarkovChain *pmarkov , int inb_output_process , + CategoricalProcess **categorical_observation , + DiscreteParametricProcess **discrete_parametric_observation , + ContinuousParametricProcess **continuous_parametric_observation , + int length) + +{ + int i; + + + build(*pmarkov); + + nb_iterator = 0; + markov_data = NULL; + + nb_output_process = inb_output_process; + + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + discrete_parametric_process = new DiscreteParametricProcess*[nb_output_process]; + continuous_parametric_process = new ContinuousParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (categorical_observation[i]) { + categorical_process[i] = new CategoricalSequenceProcess(*categorical_observation[i]); + discrete_parametric_process[i] = NULL; + continuous_parametric_process[i] = NULL; + } + else if (discrete_parametric_observation[i]) { + categorical_process[i] = NULL; + discrete_parametric_process[i] = new DiscreteParametricProcess(*discrete_parametric_observation[i]); + continuous_parametric_process[i] = NULL; + } + else { + categorical_process[i] = NULL; + discrete_parametric_process[i] = NULL; + continuous_parametric_process[i] = new ContinuousParametricProcess(*continuous_parametric_observation[i]); + } + } + + characteristic_computation(length , true); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the HiddenVariableOrderMarkov class. + */ +/*--------------------------------------------------------------*/ + +HiddenVariableOrderMarkov::~HiddenVariableOrderMarkov() {} + + +/*--------------------------------------------------------------*/ +/** + * \brief Application of a threshold on the probability parameters of + * a hidden variable-order Markov chain. + * + * \param[in] min_probability minimum probability. + * + * \return HiddenVariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenVariableOrderMarkov* HiddenVariableOrderMarkov::thresholding(double min_probability) const + +{ + int i; + HiddenVariableOrderMarkov *hmarkov; + + + hmarkov = new HiddenVariableOrderMarkov(*this , false); + hmarkov->VariableOrderMarkovChain::thresholding(min_probability); + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + hmarkov->categorical_process[i]->thresholding(min_probability); + } + } + + return hmarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a HiddenVariableOrderMarkov object from a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] length sequence length, + * \param[in] cumul_threshold threshold on the cumulative parametric distribution functions. + * + * \return HiddenVariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenVariableOrderMarkov* HiddenVariableOrderMarkov::ascii_read(StatError &error , + const string path , int length , + double cumul_threshold) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + process_type type = DEFAULT_TYPE; + bool status , lstatus; + int i; + int line , nb_output_process , value , index; + observation_process obs_type; + const VariableOrderMarkovChain *imarkov; + CategoricalProcess **categorical_observation; + DiscreteParametricProcess **discrete_parametric_observation; + ContinuousParametricProcess **continuous_parametric_observation; + HiddenVariableOrderMarkov *hmarkov; + ifstream in_file(path.c_str()); + + + hmarkov = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + line = 0; + + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + + // test (EQUILIBRIUM_)HIDDEN_MARKOV_CHAIN keyword + + if (i == 0) { + if (*token == SEQ_word[SEQW_HIDDEN_MARKOV_CHAIN]) { + type = ORDINARY; + } + else if (*token == SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_MARKOV_CHAIN]) { + type = EQUILIBRIUM; + } + else { + status = false; + ostringstream correction_message; + correction_message << SEQ_word[SEQW_HIDDEN_MARKOV_CHAIN] << " or " + << SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_MARKOV_CHAIN]; + error.correction_update(STAT_parsing[STATP_KEYWORD] , + (correction_message.str()).c_str() , line); + } + } + + i++; + } + + if (i > 0) { + if (i != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + break; + } + } + + if (type != DEFAULT_TYPE) { + + // analysis of the format and reading of the variable-order Markov chain + + imarkov = VariableOrderMarkovChain::parsing(error , in_file , line , type); + + // analysis of the format and reading of the observation distributions + + if (imarkov) { + nb_output_process = I_DEFAULT; + + categorical_observation = NULL; + discrete_parametric_observation = NULL; + continuous_parametric_observation = NULL; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (i) { + + // test number of observation processes + + case 0 : { + lstatus = true; + +/* try { + value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + value = atoi(token->c_str()); + + if (lstatus) { + if ((value < 1) || (value > NB_OUTPUT_PROCESS)) { + lstatus = false; + } + else { + nb_output_process = value; + } + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_NB_OUTPUT_PROCESS] , line , i + 1); + } + break; + } + + // test OUTPUT_PROCESS(ES) keyword + + case 1 : { + if (*token != STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , + STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES] , line , i + 1); + } + break; + } + } + + i++; + } + + if (i > 0) { + if (i != 2) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + break; + } + } + + if (nb_output_process == I_DEFAULT) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + else { + categorical_observation = new CategoricalProcess*[nb_output_process]; + discrete_parametric_observation = new DiscreteParametricProcess*[nb_output_process]; + continuous_parametric_observation = new ContinuousParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + categorical_observation[i] = NULL; + discrete_parametric_observation[i] = NULL; + continuous_parametric_observation[i] = NULL; + } + + index = 0; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (i) { + + // test OUTPUT_PROCESS keyword + + case 0 : { + if (*token != STAT_word[STATW_OUTPUT_PROCESS]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_OUTPUT_PROCESS] , line , i + 1); + } + break; + } + + // test observation process index + + case 1 : { + index++; + lstatus = true; + +/* try { + value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + value = atoi(token->c_str()); + + if ((lstatus) && ((value != index) || (value > nb_output_process))) { + lstatus = false; + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_OUTPUT_PROCESS_INDEX] , line , i + 1); + } + break; + } + + // test separator + + case 2 : { + if (*token != ":") { + status = false; + error.update(STAT_parsing[STATP_SEPARATOR] , line , i + 1); + } + break; + } + + // test CATEGORICAL/DISCRETE_PARAMETRIC/CONTINUOUS_PARAMETRIC keyword + + case 3 : { + if ((*token == STAT_word[STATW_CATEGORICAL]) || + (*token == STAT_word[STATW_NONPARAMETRIC])) { + obs_type = CATEGORICAL_PROCESS; + } + else if ((*token == STAT_word[STATW_DISCRETE_PARAMETRIC]) || + (*token == STAT_word[STATW_PARAMETRIC])) { + obs_type = DISCRETE_PARAMETRIC; + } + else if (*token == STAT_word[STATW_CONTINUOUS_PARAMETRIC]) { + obs_type = CONTINUOUS_PARAMETRIC; + } + else { + obs_type = DEFAULT_PROCESS; + status = false; + ostringstream correction_message; + correction_message << STAT_word[STATW_CATEGORICAL] << " or " + << STAT_word[STATW_DISCRETE_PARAMETRIC] << " or " + << STAT_word[STATW_CONTINUOUS_PARAMETRIC]; + error.correction_update(STAT_parsing[STATP_KEYWORD] , (correction_message.str()).c_str() , line , i + 1); + } + break; + } + } + + i++; + } + + if (i > 0) { + if (i != 4) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + switch (obs_type) { + + case CATEGORICAL_PROCESS : { + categorical_observation[index - 1] = CategoricalProcess::parsing(error , in_file , line , + ((Chain*)imarkov)->nb_state , + HIDDEN_MARKOV , true); + if (!categorical_observation[index - 1]) { + status = false; + } + break; + } + + case DISCRETE_PARAMETRIC : { + discrete_parametric_observation[index - 1] = DiscreteParametricProcess::parsing(error , in_file , line , + ((Chain*)imarkov)->nb_state , + HIDDEN_MARKOV , + cumul_threshold); + if (!discrete_parametric_observation[index - 1]) { + status = false; + } + break; + } + + case CONTINUOUS_PARAMETRIC : { + continuous_parametric_observation[index - 1] = ContinuousParametricProcess::parsing(error , in_file , line , + ((Chain*)imarkov)->nb_state , + HIDDEN_MARKOV , + ZERO_INFLATED_GAMMA); + if (!continuous_parametric_observation[index - 1]) { + status = false; + } + break; + } + } + } + + if (index == nb_output_process) { + break; + } + } + + if (index < nb_output_process) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + else { + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + if (!(trim_right_copy_if(buffer , is_any_of(" \t")).empty())) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + } + + if (status) { + hmarkov = new HiddenVariableOrderMarkov(imarkov , nb_output_process , + categorical_observation , + discrete_parametric_observation , + continuous_parametric_observation , length); + +# ifdef DEBUG + hmarkov->ascii_write(cout); +# endif + + } + + delete imarkov; + + for (i = 0;i < nb_output_process;i++) { + delete categorical_observation[i]; + delete discrete_parametric_observation[i]; + delete continuous_parametric_observation[i]; + } + delete [] categorical_observation; + delete [] discrete_parametric_observation; + delete [] continuous_parametric_observation; + } + } + } + } + + return hmarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a HiddenVariableOrderMarkov object in a file. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& HiddenVariableOrderMarkov::ascii_write(ostream &os , bool exhaustive) const + +{ + VariableOrderMarkov::ascii_write(os , markov_data , exhaustive , false , true); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a HiddenVariableOrderMarkov object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + VariableOrderMarkov::ascii_write(out_file , markov_data , exhaustive , true , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a HiddenVariableOrderMarkov object in a file + * at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::spreadsheet_write(StatError &error , + const string path) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + VariableOrderMarkov::spreadsheet_write(out_file , markov_data , true); + } + + return status; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/hidden_variable_order_markov.h b/src/cpp/sequence_analysis/hidden_variable_order_markov.h new file mode 100644 index 0000000..6346ff0 --- /dev/null +++ b/src/cpp/sequence_analysis/hidden_variable_order_markov.h @@ -0,0 +1,189 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef HIDDEN_VARIABLE_ORDER_MARKOV_H +#define HIDDEN_VARIABLE_ORDER_MARKOV_H + + +#include "variable_order_markov.h" + + +namespace sequence_analysis { + + + +/**************************************************************** + * + * Constants + */ + + + const double VARIABLE_ORDER_MARKOV_LIKELIHOOD_DIFF = 1.e-6; // threshold for stopping the EM iterations + const int VARIABLE_ORDER_MARKOV_NB_ITER = 100; // maximum number of EM iterations + + + +/**************************************************************** + * + * Class definition + */ + + + /// \brief hidden variable-order Markov chain + + class HiddenVariableOrderMarkov : public VariableOrderMarkov { + + friend class MarkovianSequences; + + friend std::ostream& operator<<(std::ostream &os , const HiddenVariableOrderMarkov &hmarkov) + { return hmarkov.ascii_write(os); } + + private : + + void forward_backward(VariableOrderMarkovData &seq) const; + double forward_backward(MarkovianSequences &seq , int index , std::ostream *os , + stat_tool::MultiPlotSet *plot_set , stat_tool::output_format format , + double &max_marginal_entropy , double &entropy1) const; + double forward_backward_sampling(const MarkovianSequences &seq , int index , std::ostream &os , + stat_tool::output_format format = stat_tool::ASCII , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + + void log_computation(); + double viterbi(const MarkovianSequences &seq , double *posterior_probability , + int index = stat_tool::I_DEFAULT) const; + void viterbi(VariableOrderMarkovData &seq) const; + double generalized_viterbi(const MarkovianSequences &seq , int index , std::ostream &os , + double seq_likelihood , stat_tool::output_format format , + int inb_state_sequence) const; + double viterbi_forward_backward(const MarkovianSequences &seq , int index , std::ostream *os , + stat_tool::MultiPlot *plot , stat_tool::output_format format , + double seq_likelihood = D_INF) const; + + bool state_profile_write(stat_tool::StatError &error , std::ostream &os , + const MarkovianSequences &iseq , int identifier , + stat_tool::output_format format = stat_tool::ASCII , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + + public : + + HiddenVariableOrderMarkov() {} + HiddenVariableOrderMarkov(const VariableOrderMarkovChain *pmarkov , int inb_output_process , + stat_tool::CategoricalProcess **categorical_observation , + stat_tool::DiscreteParametricProcess **discrete_parametric_observation , + stat_tool::ContinuousParametricProcess **continuous_parametric_observation , + int length) + :VariableOrderMarkov(pmarkov , inb_output_process , categorical_observation , + discrete_parametric_observation , + continuous_parametric_observation , length) {} + HiddenVariableOrderMarkov(const HiddenVariableOrderMarkov &hmarkov , bool data_flag = true) + :VariableOrderMarkov(hmarkov , data_flag) {} + ~HiddenVariableOrderMarkov(); + + HiddenVariableOrderMarkov* thresholding(double min_probability = MIN_PROBABILITY) const; + + static HiddenVariableOrderMarkov* ascii_read(stat_tool::StatError &error , const std::string path , + int length = DEFAULT_LENGTH , + double cumul_threshold = OCCUPANCY_THRESHOLD); + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + + double likelihood_computation(const MarkovianSequences &seq , double *posterior_probability = NULL , + int index = stat_tool::I_DEFAULT) const; + + bool state_profile_ascii_write(StatError &error , std::ostream &os , const MarkovianSequences &iseq , + int identifier , latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + bool state_profile_write(stat_tool::StatError &error , const std::string path , + const MarkovianSequences &iseq , int identifier , + stat_tool::output_format format = stat_tool::ASCII , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + bool state_profile_ascii_write(stat_tool::StatError &error , std::ostream &os , int identifier , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + bool state_profile_write(stat_tool::StatError &error , const std::string path , int identifier , + stat_tool::output_format format = stat_tool::ASCII , + latent_structure_algorithm state_sequence = GENERALIZED_VITERBI , + int nb_state_sequence = NB_STATE_SEQUENCE) const; + + bool state_profile_plot_write(stat_tool::StatError &error , const char *prefix , + const MarkovianSequences &iseq , + int identifier , const char *title = NULL) const; + bool state_profile_plot_write(stat_tool::StatError &error , const char *prefix , + int identifier , const char *title = NULL) const; + + stat_tool::MultiPlotSet* state_profile_plotable_write(StatError &error , + const MarkovianSequences &iseq , + int identifier) const; + stat_tool::MultiPlotSet* state_profile_plotable_write(StatError &error , + int identifier) const; + + VariableOrderMarkovData* state_sequence_computation(stat_tool::StatError &error , + const MarkovianSequences &iseq , + bool characteristic_flag = true) const; + + VariableOrderMarkovData* simulation(stat_tool::StatError &error , + const stat_tool::FrequencyDistribution &hlength , + bool counting_flag = true , bool divergence_flag = false) const; + VariableOrderMarkovData* simulation(stat_tool::StatError &error , int nb_sequence , + int length , bool counting_flag = true) const; + VariableOrderMarkovData* simulation(stat_tool::StatError &error , int nb_sequence , + const MarkovianSequences &iseq , + bool counting_flag = true) const; + + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const HiddenVariableOrderMarkov **ihmarkov , + FrequencyDistribution **hlength , + const std::string path = "") const; + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const HiddenVariableOrderMarkov **hmarkov , int nb_sequence , + int length , const std::string path = "") const; + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const HiddenVariableOrderMarkov **hmarkov , int nb_sequence , + const MarkovianSequences **seq , const std::string path = "") const; + }; + + +}; // namespace sequence_analysis + + + +#endif diff --git a/src/cpp/sequence_analysis/hsmc_algorithms1.cpp b/src/cpp/sequence_analysis/hsmc_algorithms1.cpp new file mode 100644 index 0000000..1c9ebf6 --- /dev/null +++ b/src/cpp/sequence_analysis/hsmc_algorithms1.cpp @@ -0,0 +1,4059 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include + +#include "stat_tool/stat_label.h" + +#include "stat_tool/distribution_reestimation.hpp" // problem compiler C++ Windows + +#include "hidden_semi_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost::math; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of a hidden semi-Markov chain for sequences + * using the forward algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] posterior_probability pointer on the posterior probabilities of the most probable state sequences, + * \param[in] index sequence index. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double HiddenSemiMarkov::likelihood_computation(const MarkovianSequences &seq , + double *posterior_probability , int index) const + +{ + int i , j , k , m; + int nb_value , length , **pioutput; + double likelihood = 0. , seq_likelihood = 0. , obs_product = 0. , residual = 0., **observation = NULL, + *norm = NULL, *state_norm = NULL, *forward1 = NULL, **state_in = NULL, **proutput = NULL; + DiscreteParametric *occupancy = NULL; + + + // checking of the compatibility of the model with the data + + if (nb_output_process == seq.nb_variable) { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < seq.marginal_distribution[i]->nb_value) { + likelihood = D_INF; + break; + } + } + } + } + + else { + likelihood = D_INF; + } + + if (likelihood != D_INF) { + + // initializations + + length = (index == I_DEFAULT ? seq.max_length : seq.length[index]); + + observation = new double*[length]; + for (i = 0;i < length;i++) { + observation[i] = new double[nb_state]; + } + + norm = new double[length]; + for (i = 0;i < length;i++) { + norm[i] = 0.; + } + + state_norm = new double[nb_state]; + forward1 = new double[nb_state]; + + state_in = new double*[length - 1]; + for (i = 0;i < length - 1;i++) { + state_in[i] = new double[nb_state]; + } + + pioutput = new int*[seq.nb_variable]; + proutput = new double*[seq.nb_variable]; + + for (i = 0;i < seq.nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < seq.nb_variable;j++) { + switch (seq.type[j]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j]; + break; + } + } + seq_likelihood = 0.; + + for (j = 0;j < seq.length[i];j++) { + norm[j] = 0.; + + for (k = 0;k < nb_state;k++) { + + // computation of the observation probabilities + + observation[j][k] = 1.; + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + observation[j][k] *= categorical_process[m]->observation[k]->mass[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + observation[j][k] *= discrete_parametric_process[m]->observation[k]->mass[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m] < seq.min_interval[m] / 2)) { + switch (seq.type[m]) { + case INT_VALUE : + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m]); + break; + case REAL_VALUE : + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m]); + break; + } + } + + else if (continuous_parametric_process[m]->ident == LINEAR_MODEL) { + switch (seq.type[m]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + } + + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else if (continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (j == 0) { + switch (seq.type[m]) { + case INT_VALUE : + residual = *pioutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + case REAL_VALUE : + residual = *proutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + } + } + + else { + switch (seq.type[m]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(pioutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(proutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + } + } + + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else { + switch (seq.type[m]) { + case INT_VALUE : + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] - seq.min_interval[m] / 2 , *pioutput[m] + seq.min_interval[m] / 2); + break; + case REAL_VALUE : + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] - seq.min_interval[m] / 2 , *proutput[m] + seq.min_interval[m] / 2); + break; + } + } + } + } + + switch (sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if (j == 0) { + state_norm[k] = initial[k]; + } + else { + state_norm[k] += state_in[j - 1][k] - forward1[k]; + } + state_norm[k] *= observation[j][k]; + + norm[j] += state_norm[k]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (j == 0) { + forward1[k] = initial[k]; + } + else { + forward1[k] = state_in[j - 1][k]; + } + forward1[k] *= observation[j][k]; + + norm[j] += forward1[k]; + break; + } + } + } + + if (norm[j] > 0.) { + for (k = 0;k < nb_state;k++) { + switch (sojourn_type[k]) { + case SEMI_MARKOVIAN : + state_norm[k] /= norm[j]; + break; + case MARKOVIAN : + forward1[k] /= norm[j]; + break; + } + } + + seq_likelihood += log(norm[j]); + } + + else { + seq_likelihood = D_INF; + break; + } + + for (k = 0;k < nb_state;k++) { + + // case semi-Markovian state + + if (sojourn_type[k] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[k]; + obs_product = 1.; + forward1[k] = 0.; + + if (j < seq.length[i] - 1) { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[j - m + 1][k] / norm[j - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < j + 1) { + forward1[k] += obs_product * occupancy->mass[m] * state_in[j - m][k]; + } + + else { + switch (type) { + case ORDINARY : + forward1[k] += obs_product * occupancy->mass[m] * initial[k]; + break; + case EQUILIBRIUM : + forward1[k] += obs_product * forward[k]->mass[m] * initial[k]; + break; + } + } + } + } + + else { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[j - m + 1][k] / norm[j - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < j + 1) { + forward1[k] += obs_product * (1. - occupancy->cumul[m - 1]) * state_in[j - m][k]; + } + + else { + switch (type) { + case ORDINARY : + forward1[k] += obs_product * (1. - occupancy->cumul[m - 1]) * initial[k]; + break; + case EQUILIBRIUM : + forward1[k] += obs_product * (1. - forward[k]->cumul[m - 1]) * initial[k]; + break; + } + } + } + } + } + } + + if (j < seq.length[i] - 1) { + for (k = 0;k < nb_state;k++) { + state_in[j][k] = 0.; + for (m = 0;m < nb_state;m++) { + state_in[j][k] += transition[m][k] * forward1[m]; + } + } + } + + for (k = 0;k < seq.nb_variable;k++) { + switch (seq.type[k]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + } + + if (seq_likelihood != D_INF) { + likelihood += seq_likelihood; + if (posterior_probability) { + posterior_probability[i] = exp(posterior_probability[i] - seq_likelihood); + } + } + + else { + likelihood = D_INF; + break; + } + } + } + + for (i = 0;i < length;i++) { + delete [] observation[i]; + } + delete [] observation; + + delete [] norm; + delete [] state_norm; + delete [] forward1; + + for (i = 0;i < length - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + delete [] pioutput; + delete [] proutput; + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a hidden semi-Markov chain using the EM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] ihsmarkov initial hidden semi-Markov chain, + * \param[in] geometric_poisson flag on the estimation of Poisson geometric state occupancy distributions, + * \param[in] common_dispersion flag common dispersion parameter (continuous observation processes), + * \param[in] estimator estimator type for the reestimation of the state occupancy distributions + * (complete or partial likelihood), + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] state_sequence flag on the computation of the restored state sequences, + * \param[in] nb_iter number of iterations, + * \param[in] mean_estimator method for the computation of the state occupancy + * distribution mean (equilibrium semi-Markov chain). + * + * \return HiddenSemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenSemiMarkov* MarkovianSequences::hidden_semi_markov_estimation(StatError &error , ostream *os , + const HiddenSemiMarkov &ihsmarkov , + bool geometric_poisson , bool common_dispersion , + censoring_estimator estimator , bool counting_flag , + bool state_sequence , int nb_iter , + duration_distribution_mean_estimator mean_estimator) const + +{ + bool status, reload_prev_optimal = false; + int i , j , k , m , n; + int max_nb_value , iter , nb_likelihood_decrease , offset , nb_value , *occupancy_nb_value = NULL, + *censored_occupancy_nb_value = NULL, **pioutput = NULL; + double likelihood = D_INF , previous_likelihood , occupancy_likelihood , observation_likelihood , + min_likelihood , obs_product , residual , buff , sum , occupancy_mean , **observation , + *norm = NULL, *state_norm = NULL, **forward1 = NULL, **state_in = NULL, *backward = NULL, + **backward1 = NULL, *auxiliary = NULL, *ofrequency = NULL, *lfrequency = NULL, + *occupancy_survivor = NULL, *censored_occupancy_survivor = NULL, diff , + variance , **mean_direction = NULL, global_mean_direction , concentration , ***state_sequence_count = NULL, + **proutput = NULL; + double *complete_occupancy_weight = NULL, *censored_occupancy_weight = NULL; + Distribution *weight = NULL; + DiscreteParametric *occupancy = NULL; + ChainReestimation *chain_reestim = NULL; + Reestimation **occupancy_reestim = NULL, **length_bias_reestim = NULL, + **censored_occupancy_reestim = NULL, ***observation_reestim = NULL; + FrequencyDistribution *hoccupancy = NULL, *hobservation = NULL; + HiddenSemiMarkov *hsmarkov = NULL, *hsmarkov_best = NULL; + SemiMarkovData *seq = NULL; + +# ifdef DEBUG + double test[NB_STATE][4]; +# endif + + + hsmarkov = NULL; + error.init(); + + // EM structure: test compatibility between data and initial model + + status = false; + for (i = 0;i < nb_variable;i++) { + if (max_value[i] > min_value[i]) { + status = true; + break; + } + } + + if (!status) { + error.update(STAT_error[STATR_VARIABLE_NB_VALUE]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } +# ifdef DEBUG + if (type[i] == STATE) + cout << "Warning: " << STAT_label[STATL_VARIABLE] << " " << i + 1 << " has type " << STAT_variable_word[STATE]; +# endif +# ifdef MESSAGE + if (type[i] == STATE) + cout << "Warning: " << STAT_label[STATL_VARIABLE] << " " << i + 1 << " has type " << STAT_variable_word[STATE]; +# endif + + } + + if (ihsmarkov.nb_output_process != nb_variable) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + else { + for (i = 0;i < nb_variable;i++) { + if ((ihsmarkov.categorical_process[i]) || (ihsmarkov.discrete_parametric_process[i])) { + if (type[i] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (min_value[i] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (((ihsmarkov.categorical_process[i]) && + (ihsmarkov.categorical_process[i]->nb_value != marginal_distribution[i]->nb_value)) || + ((ihsmarkov.discrete_parametric_process[i]) && + (ihsmarkov.discrete_parametric_process[i]->nb_value < marginal_distribution[i]->nb_value))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + + else if ((ihsmarkov.categorical_process[i]) && (!characteristics[i])) { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + /*else if ((ihsmarkov.continuous_parametric_process[i]) && + (ihsmarkov.continuous_parametric_process[i]->ident == LINEAR_MODEL) && + (ihsmarkov.nb_component < ihsmarkov.nb_state)) { + status = false; + error.update(SEQ_error[SEQR_MODEL_STRUCTURE]); + }*/ + } + } + + // EM structure: test validity of arguments + + if ((nb_iter != I_DEFAULT) && (nb_iter < 1)) { + status = false; + error.update(STAT_error[STATR_NB_ITERATION]); + } + + if (status) { + if (max_length > COUNTING_MAX_LENGTH) { + counting_flag = false; + } + + // construction of the hidden semi-Markov chain + + hsmarkov = new HiddenSemiMarkov(ihsmarkov , false , (int)(max_length * SAMPLE_NB_VALUE_COEFF)); + + if (hsmarkov->type == EQUILIBRIUM) { + for (i = 0;i < hsmarkov->nb_state;i++) { + hsmarkov->initial[i] = 1. / (double)hsmarkov->nb_state; + } + } + + if (common_dispersion) { + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->continuous_parametric_process[i]) { + hsmarkov->continuous_parametric_process[i]->tied_dispersion = true; + } + } + } + +# ifdef DEBUG + cout << *hsmarkov; +# endif + + // EM structure: construction of the data structures of the algorithm + + observation = new double*[max_length]; + for (i = 0;i < max_length;i++) { + observation[i] = new double[hsmarkov->nb_state]; + } + + norm = new double[max_length]; + state_norm = new double[hsmarkov->nb_state]; + + forward1 = new double*[max_length]; + for (i = 0;i < max_length;i++) { + forward1[i] = new double[hsmarkov->nb_state]; + } + + state_in = new double*[max_length - 1]; + for (i = 0;i < max_length - 1;i++) { + state_in[i] = new double[hsmarkov->nb_state]; + } + + backward = new double[hsmarkov->nb_state]; + + backward1 = new double*[max_length]; + for (i = 0;i < max_length;i++) { + backward1[i] = new double[hsmarkov->nb_state]; + } + + auxiliary = new double[hsmarkov->nb_state]; + + chain_reestim = new ChainReestimation(hsmarkov->type , hsmarkov->nb_state , hsmarkov->nb_state); + + occupancy_nb_value = new int[hsmarkov->nb_state]; + occupancy_reestim = new Reestimation*[hsmarkov->nb_state]; + if (hsmarkov->type == EQUILIBRIUM) { + length_bias_reestim = new Reestimation*[hsmarkov->nb_state]; + } + + for (i = 0;i < hsmarkov->nb_state;i++) { + switch (hsmarkov->sojourn_type[i]) { + + case SEMI_MARKOVIAN : { + if (estimator == COMPLETE_LIKELIHOOD) { + occupancy_nb_value[i] = hsmarkov->state_process->sojourn_time[i]->alloc_nb_value; + } + else { + occupancy_nb_value[i] = MIN(hsmarkov->state_process->sojourn_time[i]->alloc_nb_value , + max_length); + } + + occupancy_reestim[i] = new Reestimation(occupancy_nb_value[i]); + if (hsmarkov->type == EQUILIBRIUM) { + length_bias_reestim[i] = new Reestimation(occupancy_nb_value[i]); + } + break; + } + + case MARKOVIAN : { + occupancy_reestim[i] = NULL; + if (hsmarkov->type == EQUILIBRIUM) { + length_bias_reestim[i] = NULL; + } + break; + } + } + } + + max_nb_value = 0; + for (i = 0;i < hsmarkov->nb_state;i++) { + if ((hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) && (occupancy_nb_value[i] > max_nb_value)) { + max_nb_value = occupancy_nb_value[i]; + } + } + + if (estimator == KAPLAN_MEIER) { + censored_occupancy_nb_value = new int[hsmarkov->nb_state]; + censored_occupancy_reestim = new Reestimation*[hsmarkov->nb_state]; + for (i = 0;i < hsmarkov->nb_state;i++) { + switch (hsmarkov->sojourn_type[i]) { + case SEMI_MARKOVIAN : + censored_occupancy_nb_value[i] = MIN(hsmarkov->state_process->sojourn_time[i]->alloc_nb_value , + max_length + 1); + censored_occupancy_reestim[i] = new Reestimation(censored_occupancy_nb_value[i]); + break; + case MARKOVIAN : + censored_occupancy_reestim[i] = NULL; + break; + } + } + + occupancy_survivor = new double[max_nb_value]; + censored_occupancy_survivor = new double[max_nb_value + 1]; + } + + hoccupancy = new FrequencyDistribution(max_nb_value); + + if ((os) && (hsmarkov->type == ORDINARY)) { + complete_occupancy_weight = new double[hsmarkov->nb_state]; + censored_occupancy_weight = new double[hsmarkov->nb_state]; + } + + observation_reestim = new Reestimation**[hsmarkov->nb_output_process]; + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((marginal_distribution[i]) && ((!(hsmarkov->continuous_parametric_process[i])) || + ((hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)))) { + observation_reestim[i] = new Reestimation*[hsmarkov->nb_state]; + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j] = new Reestimation(marginal_distribution[i]->nb_value); + } + } + + else { + observation_reestim[i] = NULL; + } + } + + max_nb_value = 0; + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((hsmarkov->discrete_parametric_process[i]) && + (max_nb_value < marginal_distribution[i]->nb_value)) { + max_nb_value = marginal_distribution[i]->nb_value; + } + } + + if (max_nb_value > 0) { + hobservation = new FrequencyDistribution(max_nb_value); + } + else { + hobservation = NULL; + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident == VON_MISES)) { + break; + } + } + + if (i < hsmarkov->nb_output_process) { + mean_direction = new double*[hsmarkov->nb_state]; + for (i = 0;i < hsmarkov->nb_state;i++) { + mean_direction[i] = new double[4]; + } + } + else { + mean_direction = NULL; + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((!marginal_distribution[i]) || ((hsmarkov->continuous_parametric_process[i]) && + ((hsmarkov->continuous_parametric_process[i]->ident == LINEAR_MODEL) || + (hsmarkov->continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL)))) { + break; + } + } + + if (i < hsmarkov->nb_output_process) { + state_sequence_count = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + state_sequence_count[i] = new double*[length[i]]; + for (j = 0;j < length[i];j++) { + state_sequence_count[i][j] = new double[hsmarkov->nb_state]; + } + } + } + else { + state_sequence_count = NULL; + } + + pioutput = new int*[nb_variable]; + proutput = new double*[nb_variable]; + + iter = 0; + nb_likelihood_decrease = 0; + + // EM structure: iterate + do { + iter++; + previous_likelihood = likelihood; + likelihood = 0.; + + // EM structure: initialization of the reestimation quantities + + chain_reestim->init(); + + for (i = 0;i < hsmarkov->nb_state;i++) { + if (hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) { + for (j = 0;j < occupancy_nb_value[i];j++) { + occupancy_reestim[i]->frequency[j] = 0.; + } + + if (hsmarkov->type == EQUILIBRIUM) { + for (j = 0;j < occupancy_nb_value[i];j++) { + length_bias_reestim[i]->frequency[j] = 0.; + } + } + + if (estimator == KAPLAN_MEIER) { + for (j = 0;j < censored_occupancy_nb_value[i];j++) { + censored_occupancy_reestim[i]->frequency[j] = 0.; + } + } + + if ((os) && (hsmarkov->type == ORDINARY)) { + complete_occupancy_weight[i] = 0.; + censored_occupancy_weight[i] = 0.; + } + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (observation_reestim[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + observation_reestim[i][j]->frequency[k] = 0.; + } + } + } + } + + if (state_sequence_count) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < hsmarkov->nb_state;k++) { + state_sequence_count[i][j][k] = 0.; + } + } + } + } + +# ifdef DEBUG + for (i = 0;i < hsmarkov->nb_state;i++) { + for (j = 0;j < 4;j++) { + test[i][j] = 0.; + } + } +# endif + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + switch (type[j]) { + case INT_VALUE : + pioutput[j] = int_sequence[i][j]; + break; + case REAL_VALUE : + proutput[j] = real_sequence[i][j]; + break; + } + } + + // EM structure: forward recurrence + + for (j = 0;j < length[i];j++) { + norm[j] = 0.; + + for (k = 0;k < hsmarkov->nb_state;k++) { + + // computation of the observation probabilities + + observation[j][k] = 1.; + for (m = 0;m < hsmarkov->nb_output_process;m++) { + if (hsmarkov->categorical_process[m]) { + observation[j][k] *= hsmarkov->categorical_process[m]->observation[k]->mass[*pioutput[m]]; + } + + else if (hsmarkov->discrete_parametric_process[m]) { + observation[j][k] *= hsmarkov->discrete_parametric_process[m]->observation[k]->mass[*pioutput[m]]; + } + + else { + if (((hsmarkov->continuous_parametric_process[m]->ident == GAMMA) || + (hsmarkov->continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (min_value[m] < min_interval[m] / 2)) { + switch (type[m]) { + case INT_VALUE : + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] , *pioutput[m] + min_interval[m]); + break; + case REAL_VALUE : + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] , *proutput[m] + min_interval[m]); + break; + } + } + + else if (hsmarkov->continuous_parametric_process[m]->ident == LINEAR_MODEL) { + switch (type[m]) { + case INT_VALUE : + residual = *pioutput[m] - (hsmarkov->continuous_parametric_process[m]->observation[k]->intercept + + hsmarkov->continuous_parametric_process[m]->observation[k]->slope * + (index_param_type == IMPLICIT_TYPE ? j : index_parameter[i][j])); + break; + case REAL_VALUE : + residual = *proutput[m] - (hsmarkov->continuous_parametric_process[m]->observation[k]->intercept + + hsmarkov->continuous_parametric_process[m]->observation[k]->slope * + (index_param_type == IMPLICIT_TYPE ? j : index_parameter[i][j])); + break; + } + + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + +# ifdef DEBUG + cout << STAT_label[STATL_STATE] << " " << k << " " << SEQ_label[SEQL_SEQUENCE] << " " << i << " " + << SEQ_label[SEQL_INDEX] << " " << j << ": " << residual << " " + << hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual) << endl; +# endif + + } + + else if (hsmarkov->continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (j == 0) { + switch (type[m]) { + case INT_VALUE : + residual = *pioutput[m] - hsmarkov->continuous_parametric_process[m]->observation[k]->location; + break; + case REAL_VALUE : + residual = *proutput[m] - hsmarkov->continuous_parametric_process[m]->observation[k]->location; + break; + } + } + + else { + switch (type[m]) { + case INT_VALUE : + residual = *pioutput[m] - (hsmarkov->continuous_parametric_process[m]->observation[k]->location + + hsmarkov->continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(pioutput[m] - 1) - hsmarkov->continuous_parametric_process[m]->observation[k]->location)); + break; + case REAL_VALUE : + residual = *proutput[m] - (hsmarkov->continuous_parametric_process[m]->observation[k]->location + + hsmarkov->continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(proutput[m] - 1) - hsmarkov->continuous_parametric_process[m]->observation[k]->location)); + break; + } + } + + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else { + switch (type[m]) { + case INT_VALUE : + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] - min_interval[m] / 2 , *pioutput[m] + min_interval[m] / 2); + break; + case REAL_VALUE : + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] - min_interval[m] / 2 , *proutput[m] + min_interval[m] / 2); + break; + } + } + } + } + + switch (hsmarkov->sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if (j == 0) { + state_norm[k] = hsmarkov->initial[k]; + } + else { + state_norm[k] += state_in[j - 1][k] - forward1[j - 1][k]; + } + state_norm[k] *= observation[j][k]; + + norm[j] += state_norm[k]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (j == 0) { + forward1[j][k] = hsmarkov->initial[k]; + } + else { + forward1[j][k] = state_in[j - 1][k]; + } + forward1[j][k] *= observation[j][k]; + + norm[j] += forward1[j][k]; + break; + } + } + } + + if (norm[j] > 0.) { + for (k = 0;k < hsmarkov->nb_state;k++) { + switch (hsmarkov->sojourn_type[k]) { + case SEMI_MARKOVIAN : + state_norm[k] /= norm[j]; + break; + case MARKOVIAN : + forward1[j][k] /= norm[j]; + break; + } + } + + likelihood += log(norm[j]); + } + + else { + likelihood = D_INF; + break; + } + + for (k = 0;k < hsmarkov->nb_state;k++) { + + // case semi-Markovian state + + if (hsmarkov->sojourn_type[k] == SEMI_MARKOVIAN) { + occupancy = hsmarkov->state_process->sojourn_time[k]; + obs_product = 1.; + forward1[j][k] = 0.; + + if (j < length[i] - 1) { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[j - m + 1][k] / norm[j - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < j + 1) { + forward1[j][k] += obs_product * occupancy->mass[m] * state_in[j - m][k]; + } + + else { + switch (hsmarkov->type) { + case ORDINARY : + forward1[j][k] += obs_product * occupancy->mass[m] * hsmarkov->initial[k]; + break; + case EQUILIBRIUM : + forward1[j][k] += obs_product * hsmarkov->forward[k]->mass[m] * hsmarkov->initial[k]; + break; + } + } + } + } + + else { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[j - m + 1][k] / norm[j - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < j + 1) { + forward1[j][k] += obs_product * (1. - occupancy->cumul[m - 1]) * state_in[j - m][k]; + } + + else { + switch (hsmarkov->type) { + case ORDINARY : + forward1[j][k] += obs_product * (1. - occupancy->cumul[m - 1]) * + hsmarkov->initial[k]; + break; + case EQUILIBRIUM : + forward1[j][k] += obs_product * (1. - hsmarkov->forward[k]->cumul[m - 1]) * + hsmarkov->initial[k]; + break; + } + } + } + } + } + } + + if (j < length[i] - 1) { + for (k = 0;k < hsmarkov->nb_state;k++) { + state_in[j][k] = 0.; + for (m = 0;m < hsmarkov->nb_state;m++) { + state_in[j][k] += hsmarkov->transition[m][k] * forward1[j][m]; + } + } + } + + for (k = 0;k < nb_variable;k++) { + switch (type[k]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + } + + if (likelihood == D_INF) { + break; + } + if (likelihood > previous_likelihood) { + // save result + if (hsmarkov_best != NULL) + delete hsmarkov_best; + hsmarkov_best = new HiddenSemiMarkov(*hsmarkov); + } + +# ifdef DEBUG + for (j = 0;j < length[i];j++) { + cout << j << " : "; + for (k = 0;k < hsmarkov->nb_state;k++) { + cout << forward1[j][k] << " "; +// cout << observation[j][k] << " "; + } + cout << endl; + } + cout << endl; +# endif + + // EM structure: backward recurrence + + for (j = 0;j < nb_variable;j++) { + if (type[j] == INT_VALUE) { + pioutput[j]--; + } + } + + j = length[i] - 1; + for (k = 0;k < hsmarkov->nb_state;k++) { + backward[k] = forward1[j][k]; + backward1[j][k] = backward[k]; + + // accumulation of the reestimation quantities of the observation distributions + + for (m = 0;m < hsmarkov->nb_output_process;m++) { + if (observation_reestim[m]) { + observation_reestim[m][k]->frequency[*pioutput[m]] += backward[k]; + } + } + + if (state_sequence_count) { + state_sequence_count[i][j][k] += backward[k]; + } + } + + for (j = length[i] - 2;j >= 0;j--) { + for (k = 0;k < nb_variable;k++) { + if (type[k] == INT_VALUE) { + pioutput[k]--; + } + } + + for (k = 0;k < hsmarkov->nb_state;k++) { + auxiliary[k] = 0.; + + switch (hsmarkov->sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = hsmarkov->state_process->sojourn_time[k]; + obs_product = 1.; + + for (m = 1;m < MIN(length[i] - j , occupancy->nb_value);m++) { + obs_product *= observation[j + m][k] / norm[j + m]; + if (obs_product == 0.) { + break; + } + + if (backward1[j + m][k] > 0.) { +// if (forward1[j + m][k] > 0.) { + if (m < length[i] - j - 1) { + buff = backward1[j + m][k] * obs_product * occupancy->mass[m] / + forward1[j + m][k]; + + // EM structure: accumulation of the reestimation quantities of the state occupancy distributions + + occupancy_reestim[k]->frequency[m] += buff * state_in[j][k]; + } + + else { + buff = obs_product * (1. - occupancy->cumul[m - 1]); + + // EM structure: accumulation of the reestimation quantities of the state occupancy distributions + + switch (estimator) { + + case COMPLETE_LIKELIHOOD : { + for (n = m;n < occupancy->nb_value;n++) { + occupancy_reestim[k]->frequency[n] += obs_product * occupancy->mass[n] * + state_in[j][k]; + } + break; + } + + case KAPLAN_MEIER : { + censored_occupancy_reestim[k]->frequency[m] += buff * state_in[j][k]; + break; + } + } + } + + auxiliary[k] += buff; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (backward1[j + 1][k] > 0.) { +// if (forward1[j + 1][k] > 0.) { + auxiliary[k] = backward1[j + 1][k] / state_in[j][k]; + +/* auxiliary[k] = backward1[j + 1][k] * observation[j + 1][k] / + (forward1[j + 1][k] * norm[j + 1]); */ + + } + break; + } + } + } + + for (k = 0;k < hsmarkov->nb_state;k++) { + backward1[j][k] = 0.; + + for (m = 0;m < hsmarkov->nb_state;m++) { + buff = auxiliary[m] * hsmarkov->transition[k][m] * forward1[j][k]; + backward1[j][k] += buff; + + // EM structure: accumulation of the reestimation quantities of the transition probabilities + + chain_reestim->transition[k][m] += buff; + } + + switch (hsmarkov->sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + backward[k] = backward[k] + backward1[j][k] - auxiliary[k] * state_in[j][k]; + if (backward[k] < 0.) { + backward[k] = 0.; + } + if (backward[k] > 1.) { + backward[k] = 1.; + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + backward[k] = backward1[j][k]; + break; + } + } + + // EM structure: accumulation of the reestimation quantities of the observation distributions + + for (m = 0;m < hsmarkov->nb_output_process;m++) { + if (observation_reestim[m]) { + observation_reestim[m][k]->frequency[*pioutput[m]] += backward[k]; + } + } + } + + if (state_sequence_count) { + for (k = 0;k < hsmarkov->nb_state;k++) { + state_sequence_count[i][j][k] += backward[k]; + } + } + } + + // accumulation of the reestimation quantities of the initial probabilities + + if (hsmarkov->type == ORDINARY) { + for (j = 0;j < hsmarkov->nb_state;j++) { + chain_reestim->initial[j] += backward[j]; + } + } + + // EM structure: accumulation of the reestimation quantities of the initial state occupancy distributions + + if ((hsmarkov->type == ORDINARY) || (estimator == COMPLETE_LIKELIHOOD)) { + for (j = 0;j < hsmarkov->nb_state;j++) { + if ((hsmarkov->sojourn_type[j] == SEMI_MARKOVIAN) && (hsmarkov->initial[j] > 0.)) { + occupancy = hsmarkov->state_process->sojourn_time[j]; + obs_product = 1.; + if (hsmarkov->type == EQUILIBRIUM) { + sum = 0.; + } + + for (k = 1;k < MIN(length[i] + 1 , occupancy->nb_value);k++) { + obs_product *= observation[k - 1][j] / norm[k - 1]; + if (obs_product == 0.) { + break; + } + + if (backward1[k - 1][j] > 0.) { +// if (forward1[k - 1][j] > 0.) { + if (k < length[i]) { + switch (hsmarkov->type) { + case ORDINARY : + occupancy_reestim[j]->frequency[k] += backward1[k - 1][j] * obs_product * + occupancy->mass[k] * hsmarkov->initial[j] / + forward1[k - 1][j]; + break; + case EQUILIBRIUM : + sum += backward1[k - 1][j] * obs_product / forward1[k - 1][j]; + length_bias_reestim[j]->frequency[k] += sum * occupancy->mass[k] * hsmarkov->initial[j] / + occupancy->mean; + break; + } + } + + else { + switch (estimator) { + + case COMPLETE_LIKELIHOOD : { + for (m = k;m < occupancy->nb_value;m++) { + switch (hsmarkov->type) { + case ORDINARY : + occupancy_reestim[j]->frequency[m] += obs_product * occupancy->mass[m] * + hsmarkov->initial[j]; + break; + case EQUILIBRIUM : + length_bias_reestim[j]->frequency[m] += (sum + obs_product * (m + 1 - k)) * occupancy->mass[m] * + hsmarkov->initial[j] / occupancy->mean; + break; + } + } + break; + } + + case KAPLAN_MEIER : { + censored_occupancy_reestim[j]->frequency[k] += obs_product * + (1. - occupancy->cumul[k - 1]) * + hsmarkov->initial[j]; + break; + } + } + } + } + } + } + } + } + +# ifdef DEBUG + for (j = length[i] - 1;j >= 0;j--) { + cout << j << " : "; + double sum = 0.; + for (k = 0;k < hsmarkov->nb_state;k++) { + sum += backward[k]; + cout << backward[k]; + if ((hsmarkov->sojourn_type[k] == SEMI_MARKOVIAN) && (j < length[i] - 1)){ + cout << " (" << backward1[j][k] << ") "; + } + } + cout << "| " << sum << endl; + + for (k = 0;k < hsmarkov->nb_state;k++) { + if (hsmarkov->sojourn_type[k] == SEMI_MARKOVIAN) { + if (j < length[i] - 1) { + test[k][0] += backward1[j][k]; + test[k][1] += auxiliary[k] * state_in[j][k]; + } + else { + test[k][2] += backward[j]; + } + if (j == 0) { + test[k][3] += backward[j]; + } + } + } + } +# endif + + if ((os) && (hsmarkov->type == ORDINARY)) { + for (j = 0;j < hsmarkov->nb_state;j++) { + if (hsmarkov->sojourn_type[j] == SEMI_MARKOVIAN) { + for (k = 0;k < length[i] - 1;k++) { + complete_occupancy_weight[j] += backward1[k][j]; + } + censored_occupancy_weight[j] += backward1[length[i] - 1][j]; + } + } + } + } + + if (likelihood != D_INF) { + if (likelihood < previous_likelihood) { + nb_likelihood_decrease++; + } + else { + nb_likelihood_decrease = 0; + } + + // EM structure: reestimation of the initial probabilities + + if (hsmarkov->type == ORDINARY) { + reestimation(hsmarkov->nb_state , chain_reestim->initial , + hsmarkov->initial , MIN_PROBABILITY , false); + } + + // EM structure: reestimation of the transition probabilities + + for (i = 0;i < hsmarkov->nb_state;i++) { + reestimation(hsmarkov->nb_state , chain_reestim->transition[i] , + hsmarkov->transition[i] , MIN_PROBABILITY , false); + } + + // EM structure: reestimation of the state occupancy distributions + + min_likelihood = 0.; + + for (i = 0;i < hsmarkov->nb_state;i++) { + if (hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) { + occupancy = hsmarkov->state_process->sojourn_time[i]; + + if (estimator == KAPLAN_MEIER) { + occupancy_reestim[i]->nb_value_computation(); + occupancy_reestim[i]->offset_computation(); + occupancy_reestim[i]->nb_element_computation(); + + censored_occupancy_reestim[i]->nb_value_computation(); + censored_occupancy_reestim[i]->offset_computation(); + censored_occupancy_reestim[i]->nb_element_computation(); + + if (censored_occupancy_reestim[i]->nb_element > 0.) { + +# ifdef DEBUG + cout << "\n" << STAT_label[STATL_STATE] << " " << i << " (" << test[i][2] + << " | " << censored_occupancy_reestim[i]->nb_element << ") - "; + + occupancy_reestim[i]->max_computation(); + occupancy_reestim[i]->mean_computation(); + occupancy_reestim[i]->variance_computation(); + + occupancy_reestim[i]->ascii_characteristic_print(cout); +# endif + + occupancy_reestim[i]->state_occupancy_estimation(censored_occupancy_reestim[i] , + occupancy_reestim[i] , + occupancy_survivor , + censored_occupancy_survivor , false); + } + } + +# ifdef DEBUG + cout << STAT_label[STATL_STATE] << " " << i << " ("; +# endif + + if ((hsmarkov->type == ORDINARY) || (estimator == PARTIAL_LIKELIHOOD)) { + occupancy_reestim[i]->nb_value_computation(); + occupancy_reestim[i]->offset_computation(); + occupancy_reestim[i]->nb_element_computation(); + occupancy_reestim[i]->max_computation(); + occupancy_reestim[i]->mean_computation(); + occupancy_reestim[i]->variance_computation(); + +# ifdef DEBUG + if (hsmarkov->type == ORDINARY) { + switch (estimator) { + case COMPLETE_LIKELIHOOD : + cout << test[i][0] + test[i][2] << " | " << test[i][1] + test[i][3]; + break; + case PARTIAL_LIKELIHOOD : + cout << test[i][0]; + break; + } + cout << " | " << occupancy_reestim[i]->nb_element << ") - "; + occupancy_reestim[i]->ascii_characteristic_print(cout); + } +# endif + + } + + else { + offset = 1; + nb_value = occupancy_nb_value[i]; + + ofrequency = occupancy_reestim[i]->frequency + occupancy_nb_value[i]; + lfrequency = length_bias_reestim[i]->frequency + occupancy_nb_value[i]; + while ((*--ofrequency == 0) && (*--lfrequency == 0) && (nb_value > 2)) { + nb_value--; + } + occupancy_reestim[i]->nb_value = nb_value; + length_bias_reestim[i]->nb_value = nb_value; + + ofrequency = occupancy_reestim[i]->frequency + offset; + lfrequency = length_bias_reestim[i]->frequency + offset; + while ((*ofrequency++ == 0) && (*lfrequency++ == 0) && (offset < nb_value - 1)) { + offset++; + } + occupancy_reestim[i]->offset = offset; + length_bias_reestim[i]->offset = offset; + + occupancy_reestim[i]->nb_element_computation(); + length_bias_reestim[i]->nb_element_computation(); + +# ifdef DEBUG + occupancy_reestim[i]->max_computation(); + occupancy_reestim[i]->mean_computation(); + occupancy_reestim[i]->variance_computation(); + + cout << test[i][1] << " | " << occupancy_reestim[i]->nb_element << ") - "; + occupancy_reestim[i]->ascii_characteristic_print(cout); + + length_bias_reestim[i]->max_computation(); + length_bias_reestim[i]->mean_computation(); + length_bias_reestim[i]->variance_computation(); + + cout << STAT_label[STATL_STATE] << " " << i << " (" << test[i][3] << " | " + << length_bias_reestim[i]->nb_element << ") - "; + length_bias_reestim[i]->ascii_characteristic_print(cout); +# endif + + switch (mean_estimator) { + case COMPUTED : + occupancy_mean = interval_bisection(occupancy_reestim[i] , length_bias_reestim[i]); + break; + case ONE_STEP_LATE : + occupancy_mean = occupancy->mean; + break; + } + +# ifdef DEBUG + cout << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_MEAN] << ": " + << occupancy_mean << endl; +# endif + + occupancy_reestim[i]->equilibrium_process_combination(length_bias_reestim[i] , occupancy_mean); + +# ifdef DEBUG + cout << test[i][0] + test[i][2] << " | " << test[i][1] + test[i][3] << " | " + << occupancy_reestim[i]->nb_element << ") - "; + occupancy_reestim[i]->ascii_characteristic_print(cout); +# endif + } + + hoccupancy->update(occupancy_reestim[i] , + MAX((int)(occupancy_reestim[i]->nb_element * + MAX(sqrt(occupancy_reestim[i]->variance) , 1.) * OCCUPANCY_COEFF) , MIN_NB_ELEMENT)); + if (iter <= EXPLORATION_NB_ITER) { + occupancy_likelihood = hoccupancy->Reestimation::parametric_estimation(occupancy , 1 , true , + OCCUPANCY_THRESHOLD , geometric_poisson); + } + else { + occupancy_likelihood = hoccupancy->Reestimation::type_parametric_estimation(occupancy , 1 , true , + OCCUPANCY_THRESHOLD , geometric_poisson); + } + + if (occupancy_likelihood == D_INF) { + min_likelihood = D_INF; + } + else { + occupancy->computation(hoccupancy->nb_value , OCCUPANCY_THRESHOLD); + if (hsmarkov->type == EQUILIBRIUM) { + hsmarkov->forward[i]->copy(*occupancy); + hsmarkov->forward[i]->computation(*occupancy); + } + } + +# ifdef DEBUG + cout << STAT_word[STATW_STATE] << " " << i << " " << SEQ_word[SEQW_OCCUPANCY_DISTRIBUTION] << endl; + occupancy->ascii_print(cout); +# endif + + } + } + + if (hsmarkov->type == EQUILIBRIUM) { + hsmarkov->initial_probability_computation(); + } + + // EM structure: reestimation of the observation distributions + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + reestimation(marginal_distribution[i]->nb_value , observation_reestim[i][j]->frequency , + hsmarkov->categorical_process[i]->observation[j]->mass , + MIN_PROBABILITY , false); + } + } + + else if (observation_reestim[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j]->nb_value_computation(); + observation_reestim[i][j]->offset_computation(); + observation_reestim[i][j]->nb_element_computation(); + observation_reestim[i][j]->max_computation(); + if ((hsmarkov->discrete_parametric_process[i]) || + (hsmarkov->continuous_parametric_process[i]->ident != ZERO_INFLATED_GAMMA)) { + observation_reestim[i][j]->mean_computation(); + observation_reestim[i][j]->variance_computation(true); +// observation_reestim[i][j]->variance_computation(); + } + } + + if (hsmarkov->discrete_parametric_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + hobservation->update(observation_reestim[i][j] , + MAX((int)(observation_reestim[i][j]->nb_element * + MAX(sqrt(observation_reestim[i][j]->variance) , 1.) * OBSERVATION_COEFF) , MIN_NB_ELEMENT)); + observation_likelihood = hobservation->Reestimation::type_parametric_estimation(hsmarkov->discrete_parametric_process[i]->observation[j] , + 0 , true , OBSERVATION_THRESHOLD); + + if (observation_likelihood == D_INF) { + min_likelihood = D_INF; + } + else { + hsmarkov->discrete_parametric_process[i]->observation[j]->computation(marginal_distribution[i]->nb_value , + OBSERVATION_THRESHOLD); + + if (hsmarkov->discrete_parametric_process[i]->observation[j]->ident == BINOMIAL) { + for (k = hsmarkov->discrete_parametric_process[i]->observation[j]->nb_value;k < marginal_distribution[i]->nb_value;k++) { + hsmarkov->discrete_parametric_process[i]->observation[j]->mass[k] = 0.; + } + } + } + } + } + + else { + switch (hsmarkov->continuous_parametric_process[i]->ident) { + + case GAMMA : { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j]->gamma_estimation(hsmarkov->continuous_parametric_process[i]->observation[j] , iter); + } + break; + } + + case ZERO_INFLATED_GAMMA : { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j]->zero_inflated_gamma_estimation(hsmarkov->continuous_parametric_process[i]->observation[j] , iter); + } + break; + } + + case GAUSSIAN : { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->location = observation_reestim[i][j]->mean; + } + + if (common_dispersion) { + variance = 0.; + buff = 0.; + + for (j = 0;j < hsmarkov->nb_state;j++) { + for (k = observation_reestim[i][j]->offset;k < observation_reestim[i][j]->nb_value;k++) { + diff = k - observation_reestim[i][j]->mean; + variance += observation_reestim[i][j]->frequency[k] * diff * diff; + } + + buff += observation_reestim[i][j]->nb_element; + } + + variance /= buff; +// variance /= (buff - 1); + + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = sqrt(variance); + } + } + + else { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = sqrt(observation_reestim[i][j]->variance); + if (hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion / + hsmarkov->continuous_parametric_process[i]->observation[j]->location < GAUSSIAN_MIN_VARIATION_COEFF) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = hsmarkov->continuous_parametric_process[i]->observation[j]->location * GAUSSIAN_MIN_VARIATION_COEFF; + } + } + } + + break; + } + + case VON_MISES : { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j]->mean_direction_computation(mean_direction[j]); + hsmarkov->continuous_parametric_process[i]->observation[j]->location = mean_direction[j][3]; + } + + if (common_dispersion) { + global_mean_direction = 0.; + buff = 0.; + + for (j = 0;j < hsmarkov->nb_state;j++) { + global_mean_direction += observation_reestim[i][j]->nb_element * mean_direction[j][2]; + buff += observation_reestim[i][j]->nb_element; + } + concentration = von_mises_concentration_computation(global_mean_direction / buff); + + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = concentration; + } + } + + else { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = von_mises_concentration_computation(mean_direction[j][2]); + } + } + break; + } + } + } + } + + else { + switch (hsmarkov->continuous_parametric_process[i]->ident) { + case GAMMA : + gamma_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i] , iter); + break; + case ZERO_INFLATED_GAMMA : + zero_inflated_gamma_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i] , iter); + break; + case GAUSSIAN : + gaussian_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i]); + break; + case VON_MISES : + von_mises_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i]); + break; + case LINEAR_MODEL : + if ((index_param_type != TIME) && (index_param_type != IMPLICIT_TYPE)) { + likelihood = D_INF; + stringstream error_message , correction_message; + error_message << SEQ_error[SEQR_INDEX_PARAMETER_TYPE] << ": shoud be "; + correction_message << SEQ_label[SEQL_TIME] << " or IMPLICIT" << endl; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + linear_model_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i]); + break; + case AUTOREGRESSIVE_MODEL : + autoregressive_model_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i]); + break; + } + } + } + } + + if (os) { + *os << STAT_label[STATL_ITERATION] << " " << iter << " " + << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << ": " << likelihood << endl; + } + +# ifdef DEBUG + if (iter % 5 == 0) { + cout << *hsmarkov; + } +# endif + + } + while ((likelihood != D_INF) && (((nb_iter == I_DEFAULT) && (iter < SEMI_MARKOV_NB_ITER) && + (((likelihood - previous_likelihood) / -likelihood > SEMI_MARKOV_LIKELIHOOD_DIFF) || + (min_likelihood == D_INF) || (nb_likelihood_decrease == 1))) || + ((nb_iter != I_DEFAULT) && (iter < nb_iter)))); + + // EM structure: manage return value + + if ((likelihood == D_INF) && (hsmarkov_best != NULL)) { + *os << "\n Convergence failed, returning saved model with highest likelihood" << endl; + delete hsmarkov; + hsmarkov = NULL; + hsmarkov = new HiddenSemiMarkov(*hsmarkov_best); + likelihood = hsmarkov->likelihood_computation(*this); + delete hsmarkov_best; + hsmarkov_best = NULL; + reload_prev_optimal = true; + } + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << iter << " " << STAT_label[STATL_ITERATIONS] << endl; + + if (hsmarkov->type == ORDINARY) { + *os << "\n" << SEQ_label[SEQL_OCCUPANCY_WEIGHTS] << endl; + for (i = 0;i < hsmarkov->nb_state;i++) { + if (hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) { + *os << STAT_label[STATL_STATE] << " " << i << ": " << complete_occupancy_weight[i] << ", " + << censored_occupancy_weight[i]; + if ((complete_occupancy_weight[i] > 0.) && (censored_occupancy_weight[i] > 0.)) { + *os << " (" << complete_occupancy_weight[i] / (complete_occupancy_weight[i] + censored_occupancy_weight[i]) << ", " + << censored_occupancy_weight[i] / (complete_occupancy_weight[i] + censored_occupancy_weight[i]) << ")"; + } + *os << endl; + } + } + } + } + + // reestimation of the initial probabilities + if (!reload_prev_optimal) { + if (hsmarkov->type == ORDINARY) { + reestimation(hsmarkov->nb_state , chain_reestim->initial , + hsmarkov->initial , MIN_PROBABILITY , true); + } + + // reestimation of the transition probabilities + + for (i = 0;i < hsmarkov->nb_state;i++) { + reestimation(hsmarkov->nb_state , chain_reestim->transition[i] , + hsmarkov->transition[i] , MIN_PROBABILITY , true); + } + + if (hsmarkov->type == EQUILIBRIUM) { + hsmarkov->initial_probability_computation(); + } + } + + for (i = 0;i < hsmarkov->nb_state;i++) { + if ((hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) && + (hsmarkov->state_process->sojourn_time[i]->mean == 1.)) { + hsmarkov->sojourn_type[i] = MARKOVIAN; + delete hsmarkov->state_process->sojourn_time[i]; + hsmarkov->state_process->sojourn_time[i] = NULL; + delete hsmarkov->forward[i]; + hsmarkov->forward[i] = NULL; + } + } + + // reestimation of the categorical observation distributions + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + reestimation(marginal_distribution[i]->nb_value , observation_reestim[i][j]->frequency , + hsmarkov->categorical_process[i]->observation[j]->mass , + MIN_PROBABILITY , true); + } + } + + else if (hsmarkov->discrete_parametric_process[i]) { + hsmarkov->discrete_parametric_process[i]->nb_value_computation(); + } + } + } + + // destruction of the data structures of the algorithm + + for (i = 0;i < max_length;i++) { + delete [] observation[i]; + } + delete [] observation; + + delete [] norm; + delete [] state_norm; + + for (i = 0;i < max_length;i++) { + delete [] forward1[i]; + } + delete [] forward1; + + for (i = 0;i < max_length - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + delete [] backward; + + for (i = 0;i < max_length;i++) { + delete [] backward1[i]; + } + delete [] backward1; + + delete [] auxiliary; + + delete chain_reestim; + + delete [] occupancy_nb_value; + + for (i = 0;i < hsmarkov->nb_state;i++) { + delete occupancy_reestim[i]; + } + delete [] occupancy_reestim; + + if (hsmarkov->type == EQUILIBRIUM) { + for (i = 0;i < hsmarkov->nb_state;i++) { + delete length_bias_reestim[i]; + } + delete [] length_bias_reestim; + } + + if (estimator == KAPLAN_MEIER) { + delete [] censored_occupancy_nb_value; + + for (i = 0;i < hsmarkov->nb_state;i++) { + delete censored_occupancy_reestim[i]; + } + delete [] censored_occupancy_reestim; + + delete [] occupancy_survivor; + delete [] censored_occupancy_survivor; + } + + delete hoccupancy; + + if ((os) && (hsmarkov->type == ORDINARY)) { + delete [] complete_occupancy_weight; + delete [] censored_occupancy_weight; + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (observation_reestim[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + delete observation_reestim[i][j]; + } + delete [] observation_reestim[i]; + } + } + delete [] observation_reestim; + + delete hobservation; + + if (mean_direction) { + for (i = 0;i < hsmarkov->nb_state;i++) { + delete [] mean_direction[i]; + } + delete [] mean_direction; + } + + if (state_sequence_count) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + delete [] state_sequence_count[i][j]; + } + delete [] state_sequence_count[i]; + } + delete [] state_sequence_count; + } + + delete [] pioutput; + delete [] proutput; + + if (likelihood == D_INF) { + delete hsmarkov; + hsmarkov = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + + else { + if (state_sequence) { + hsmarkov->semi_markov_data = new SemiMarkovData(*this , ADD_STATE_VARIABLE , + (hsmarkov->type == EQUILIBRIUM ? true : false)); + seq = hsmarkov->semi_markov_data; + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (((hsmarkov->discrete_parametric_process[i]) || (hsmarkov->continuous_parametric_process[i])) && + (seq->characteristics[i + 1])) { + delete seq->characteristics[i + 1]; + seq->characteristics[i + 1] = NULL; + } + } + + hsmarkov->forward_backward(*seq); + + hsmarkov->create_cumul(); + hsmarkov->log_computation(); + hsmarkov->viterbi(*seq); + hsmarkov->remove_cumul(); + + seq->min_value[0] = 0; // seq->min_value_computation(0); + seq->max_value[0] = hsmarkov->nb_state-1; // seq->max_value_computation(0); + seq->build_marginal_frequency_distribution(0); + // variable 0 corresponds to hidden state. + // The states for which characteristics are computed are those which are present + seq->build_characteristic(0 , true , (hsmarkov->type == EQUILIBRIUM ? true : false)); + + seq->build_transition_count(hsmarkov); + seq->build_observation_frequency_distribution(hsmarkov->nb_state); + seq->build_observation_histogram(hsmarkov->nb_state); + + // computation of the state occupancy distributions + + for (i = 0;i < hsmarkov->nb_state;i++) { + if (hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) { + + if (seq->characteristics[0] != NULL && seq->characteristics[0]->sojourn_time[i] != NULL) + hsmarkov->state_process->sojourn_time[i]->computation(seq->characteristics[0]->sojourn_time[i]->nb_value , + OCCUPANCY_THRESHOLD); + else + hsmarkov->state_process->sojourn_time[i]->computation(1 , OCCUPANCY_THRESHOLD); + if (hsmarkov->stype[i] == RECURRENT) { + if (hsmarkov->type == ORDINARY) { + hsmarkov->forward[i]->copy(*(hsmarkov->state_process->sojourn_time[i])); + } + hsmarkov->forward[i]->computation(*(hsmarkov->state_process->sojourn_time[i])); + } + } + } + + // computation of the mixtures of observation distributions (weights deduced from the restoration) + + weight = NULL; + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((hsmarkov->categorical_process[i]) || (hsmarkov->discrete_parametric_process[i]) || + ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL))) { + weight = seq->weight_computation(); + break; + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + hsmarkov->categorical_process[i]->restoration_weight = new Distribution(*weight); + hsmarkov->categorical_process[i]->restoration_mixture = hsmarkov->categorical_process[i]->mixture_computation(hsmarkov->categorical_process[i]->restoration_weight); + } + + else if (hsmarkov->discrete_parametric_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->discrete_parametric_process[i]->observation[j]->cumul_computation(); +// hsmarkov->discrete_parametric_process[i]->observation[j]->computation(seq->observation_distribution[i + 1][j]->nb_value , +// OBSERVATION_THRESHOLD); + } + + hsmarkov->discrete_parametric_process[i]->restoration_weight = new Distribution(*weight); + hsmarkov->discrete_parametric_process[i]->restoration_mixture = hsmarkov->discrete_parametric_process[i]->mixture_computation(hsmarkov->discrete_parametric_process[i]->restoration_weight); + } + + else if ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)) { + hsmarkov->continuous_parametric_process[i]->restoration_weight = new Distribution(*weight); + } + } + + delete weight; + + if ((os) && (seq->characteristics[0])) { + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD] << ": " << seq->restoration_likelihood; + + for (i = 0;i < nb_variable;i++) { + if (type[i] == REAL_VALUE) { + break; + } + } + if (i == nb_variable) { + *os << " | " << hsmarkov->SemiMarkov::likelihood_computation(*seq); + } + *os << endl; + } + } + + else { + if (hsmarkov->type == ORDINARY) { + for (i = 0;i < hsmarkov->nb_state;i++) { + if ((hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) && (hsmarkov->stype[i] == RECURRENT)) { + hsmarkov->forward[i]->copy(*(hsmarkov->state_process->sojourn_time[i])); + hsmarkov->forward[i]->computation(*(hsmarkov->state_process->sojourn_time[i])); + } + } + } + + hsmarkov->semi_markov_data = new SemiMarkovData(*this , SEQUENCE_COPY , + (hsmarkov->type == EQUILIBRIUM ? true : false)); + seq = hsmarkov->semi_markov_data; + if (seq->type[0] == STATE) { + seq->state_variable_init(INT_VALUE); + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (((hsmarkov->discrete_parametric_process[i]) || (hsmarkov->continuous_parametric_process[i])) && + (seq->characteristics[i])) { + delete seq->characteristics[i]; + seq->characteristics[i] = NULL; + } + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->categorical_process[i]->observation[j]->cumul_computation(); + + hsmarkov->categorical_process[i]->observation[j]->max_computation(); +// hsmarkov->categorical_process[i]->observation[j]->mean_computation(); +// hsmarkov->categorical_process[i]->observation[j]->variance_computation(); + } + } + } + + // computation of the log-likelihood and the characteristic distributions of the model + + seq->likelihood = hsmarkov->likelihood_computation(*this , seq->posterior_probability); + + hsmarkov->component_computation(); + hsmarkov->characteristic_computation(*seq , counting_flag , I_DEFAULT , false); + + // computation of the mixtures of observation distributions (theoretical weights) + + weight = NULL; + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((hsmarkov->categorical_process[i]) || (hsmarkov->discrete_parametric_process[i]) || + ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL))) { + switch (hsmarkov->type) { + case ORDINARY : + weight = hsmarkov->state_process->weight_computation(); + break; + case EQUILIBRIUM : + weight = new Distribution(hsmarkov->nb_state , hsmarkov->initial); + break; + } + break; + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + hsmarkov->categorical_process[i]->weight = new Distribution(*weight); + hsmarkov->categorical_process[i]->mixture = hsmarkov->categorical_process[i]->mixture_computation(hsmarkov->categorical_process[i]->weight); + } + + else if (hsmarkov->discrete_parametric_process[i]) { + hsmarkov->discrete_parametric_process[i]->weight = new Distribution(*weight); + hsmarkov->discrete_parametric_process[i]->mixture = hsmarkov->discrete_parametric_process[i]->mixture_computation(hsmarkov->discrete_parametric_process[i]->weight); + } + + else if ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)) { + hsmarkov->continuous_parametric_process[i]->weight = new Distribution(*weight); + } + } + + delete weight; + + if ((os) && (state_sequence) && (seq->nb_sequence <= POSTERIOR_PROBABILITY_NB_SEQUENCE)) { + int *pstate; + + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] << endl; + for (i = 0;i < seq->nb_sequence;i++) { + *os << SEQ_label[SEQL_SEQUENCE] << " " << seq->identifier[i] << ": " + << seq->posterior_probability[i]; + + if (hsmarkov->nb_component == hsmarkov->nb_state) { + *os << " | " << SEQ_label[SEQL_STATE_BEGIN] << ": "; + + pstate = seq->int_sequence[i][0] + 1; + if (seq->index_parameter) { + for (j = 1;j < seq->length[i];j++) { + if (*pstate != *(pstate - 1)) { + *os << seq->index_parameter[i][j] << ", "; + } + pstate++; + } + } + + else { + for (j = 1;j < seq->length[i];j++) { + if (*pstate != *(pstate - 1)) { + *os << j << ", "; + } + pstate++; + } + } + } + + *os << endl; + } + } + } + } + + return hsmarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a hidden semi-Markov chain using the EM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] nb_state number of states, + * \param[in] left_right flag on the Markov chain structure, + * \param[in] occupancy_mean mean state occupancy, + * \param[in] geometric_poisson flag on the estimation of Poisson geometric state occupancy distributions, + * \param[in] common_dispersion flag common dispersion parameter (continuous observation processes), + * \param[in] estimator estimator type for the reestimation of the state occupancy distributions + * (complete or partial likelihood), + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] state_sequence flag on the computation of the restored state sequences, + * \param[in] nb_iter number of iterations, + * \param[in] mean_estimator method for the computation of the state occupancy + * distribution mean (equilibrium semi-Markov chain). + * + * \return HiddenSemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenSemiMarkov* MarkovianSequences::hidden_semi_markov_estimation(StatError &error , ostream *os , + process_type itype , int nb_state , + bool left_right , double occupancy_mean , + bool geometric_poisson , bool common_dispersion , + censoring_estimator estimator , bool counting_flag , + bool state_sequence , int nb_iter , + duration_distribution_mean_estimator mean_estimator) const + +{ + bool status = true; + int i; + int nb_value[SEQUENCE_NB_VARIABLE]; + double proba , mean , variance; + HiddenSemiMarkov *ihsmarkov , *hsmarkov; + + + hsmarkov = NULL; + error.init(); + + if ((nb_state < 2) || (nb_state > NB_STATE)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + if ((occupancy_mean != D_DEFAULT) && (occupancy_mean <= 1.)) { + status = false; + error.update(SEQ_error[SEQR_OCCUPANCY]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (status) { + for (i = 0;i < nb_variable;i++) { + if (marginal_distribution[i]) { + nb_value[i] = marginal_distribution[i]->nb_value; + } + else { + nb_value[i] = I_DEFAULT; + } + } + + ihsmarkov = new HiddenSemiMarkov(itype , nb_state , nb_variable , nb_value); + + // initialization of the Markov chain parameters + + ihsmarkov->init(left_right , 0.); + + // initialization of the state occupancy distributions + + if (occupancy_mean == D_DEFAULT) { + occupancy_mean = MAX(length_distribution->mean , OCCUPANCY_MEAN); + } +# ifdef DEBUG + assert(ihsmarkov->sojourn_type == NULL); +# endif + ihsmarkov->sojourn_type = new state_sojourn_type[nb_state]; + ihsmarkov->state_process->absorption = new double[nb_state]; + ihsmarkov->state_process->sojourn_time = new DiscreteParametric*[nb_state]; + ihsmarkov->forward = new Forward*[nb_state]; + + for (i = 0;i < nb_state;i++) { + if (ihsmarkov->stype[i] != ABSORBING) { + ihsmarkov->sojourn_type[i] = SEMI_MARKOVIAN; + ihsmarkov->state_process->absorption[i] = 0.; + proba = 1. / occupancy_mean; + ihsmarkov->state_process->sojourn_time[i] = new DiscreteParametric(NEGATIVE_BINOMIAL , 1 , + I_DEFAULT , 1. , proba , + OCCUPANCY_THRESHOLD); + + if (ihsmarkov->stype[i] == RECURRENT) { + ihsmarkov->forward[i] = new Forward(*(ihsmarkov->state_process->sojourn_time[i]) , + ihsmarkov->state_process->sojourn_time[i]->alloc_nb_value); + } + else { + ihsmarkov->forward[i] = NULL; + } + } + + else { + ihsmarkov->sojourn_type[i] = MARKOVIAN; + ihsmarkov->state_process->absorption[i] = 1.; + ihsmarkov->state_process->sojourn_time[i] = NULL; + ihsmarkov->forward[i] = NULL; + } + } + + // initialization of the observation distributions + + for (i = 0;i < ihsmarkov->nb_output_process;i++) { + if (ihsmarkov->categorical_process[i]) { + ihsmarkov->categorical_process[i]->init(); + } + + else if (ihsmarkov->discrete_parametric_process[i]) { + ihsmarkov->discrete_parametric_process[i]->init(); + } + + else { + mean = mean_computation(i); + variance = variance_computation(i , mean); + + ihsmarkov->continuous_parametric_process[i]->init(GAUSSIAN , min_value[i] , max_value[i] , + mean , variance); + } + } + + hsmarkov = hidden_semi_markov_estimation(error , os , *ihsmarkov , geometric_poisson , + common_dispersion , estimator , counting_flag , + state_sequence , nb_iter , mean_estimator); + delete ihsmarkov; + } + + return hsmarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a hidden semi-Markov chain using the MCEM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] ihsmarkov initial hidden semi-Markov chain, + * \param[in] geometric_poisson flag on the estimation of Poisson geometric state occupancy distributions, + * \param[in] common_dispersion flag common dispersion parameter (continuous observation processes), + * \param[in] min_nb_state_sequence minimum number of generated sequences, + * \param[in] max_nb_state_sequence maximum number of generated sequences, + * \param[in] parameter parameter for defining the number of generated sequences, + * \param[in] estimator estimator type for the reestimation of the state occupancy distributions + * \param[in] (complete or partial likelihood), + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] state_sequence flag on the computation of the restored state sequences, + * \param[in] nb_iter number of iterations. + * + * \return HiddenSemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenSemiMarkov* MarkovianSequences::hidden_semi_markov_stochastic_estimation(StatError &error , ostream *os , + const HiddenSemiMarkov &ihsmarkov , + bool geometric_poisson , bool common_dispersion , + int min_nb_state_sequence , + int max_nb_state_sequence , double parameter , + censoring_estimator estimator , + bool counting_flag , bool state_sequence , + int nb_iter) const + +{ + bool status; + int i , j , k , m , n; + int max_nb_value , iter , nb_state_sequence , state_occupancy , nb_likelihood_decrease , + *occupancy_nb_value , *state_seq , *pstate , ***state_sequence_count , nb_element , **pioutput; + double likelihood = D_INF , previous_likelihood , occupancy_likelihood , observation_likelihood , + min_likelihood , obs_product , residual , **observation , *norm , *state_norm , **forward1 , + **state_in , *backward , *cumul_backward , *occupancy_survivor , *censored_occupancy_survivor , + diff , variance , **mean_direction , concentration , global_mean_direction , **proutput; + Distribution *weight; + DiscreteParametric *occupancy; + ChainReestimation *chain_reestim; + Reestimation *bcomplete_run , *censored_run , **complete_run , **final_run , **initial_run , + **single_run , ***observation_reestim; + HiddenSemiMarkov *hsmarkov; + SemiMarkovData *seq; + const Reestimation *prun[3]; + +# ifdef DEBUG + double sum; +# endif + + + hsmarkov = NULL; + error.init(); + + // test number of values for each variable + + status = false; + for (i = 0;i < nb_variable;i++) { + if (max_value[i] > min_value[i]) { + status = true; + break; + } + } + + if (!status) { + error.update(STAT_error[STATR_VARIABLE_NB_VALUE]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (ihsmarkov.nb_output_process != nb_variable) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + else { + for (i = 0;i < nb_variable;i++) { + if ((ihsmarkov.categorical_process[i]) || (ihsmarkov.discrete_parametric_process[i])) { + if (type[i] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (min_value[i] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (((ihsmarkov.categorical_process[i]) && + (ihsmarkov.categorical_process[i]->nb_value != marginal_distribution[i]->nb_value)) || + ((ihsmarkov.discrete_parametric_process[i]) && + (ihsmarkov.discrete_parametric_process[i]->nb_value < marginal_distribution[i]->nb_value))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + + else if ((ihsmarkov.categorical_process[i]) && (!characteristics[i])) { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + else if ((ihsmarkov.continuous_parametric_process[i]) && + (ihsmarkov.continuous_parametric_process[i]->ident == LINEAR_MODEL) && + (ihsmarkov.nb_component < ihsmarkov.nb_state)) { + status = false; + error.update(SEQ_error[SEQR_MODEL_STRUCTURE]); + } + } + } + + if ((min_nb_state_sequence < 1) || (min_nb_state_sequence > max_nb_state_sequence)) { + status = false; + error.update(SEQ_error[SEQR_MIN_NB_STATE_SEQUENCE]); + } + + if ((nb_iter != I_DEFAULT) && (nb_iter < 1)) { + status = false; + error.update(STAT_error[STATR_NB_ITERATION]); + } + + if (status) { + if (max_length > COUNTING_MAX_LENGTH) { + counting_flag = false; + } + + // construction of the hidden semi-Markov chain + + hsmarkov = new HiddenSemiMarkov(ihsmarkov , false , (int)(max_length * SAMPLE_NB_VALUE_COEFF)); + + if (hsmarkov->type == EQUILIBRIUM) { + for (i = 0;i < hsmarkov->nb_state;i++) { + hsmarkov->initial[i] = 1. / (double)hsmarkov->nb_state; + } + } + + if (common_dispersion) { + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->continuous_parametric_process[i]) { + hsmarkov->continuous_parametric_process[i]->tied_dispersion = true; + } + } + } + +# ifdef DEBUG + cout << *hsmarkov; +# endif + + // construction of the data structures of the algorithm + + observation = new double*[max_length]; + for (i = 0;i < max_length;i++) { + observation[i] = new double[hsmarkov->nb_state]; + } + + norm = new double[max_length]; + state_norm = new double[hsmarkov->nb_state]; + + forward1 = new double*[max_length]; + for (i = 0;i < max_length;i++) { + forward1[i] = new double[hsmarkov->nb_state]; + } + + state_in = new double*[max_length - 1]; + for (i = 0;i < max_length - 1;i++) { + state_in[i] = new double[hsmarkov->nb_state]; + } + + backward = new double[max_length + 1]; + cumul_backward = new double[max_length + 1]; + + state_seq = new int[max_length]; + + chain_reestim = new ChainReestimation(hsmarkov->type , hsmarkov->nb_state , hsmarkov->nb_state); + + occupancy_nb_value = new int[hsmarkov->nb_state]; + complete_run = new Reestimation*[hsmarkov->nb_state]; + final_run = new Reestimation*[hsmarkov->nb_state]; + if (hsmarkov->type == EQUILIBRIUM) { + initial_run = new Reestimation*[hsmarkov->nb_state]; + single_run = new Reestimation*[hsmarkov->nb_state]; + } + + for (i = 0;i < hsmarkov->nb_state;i++) { + switch (hsmarkov->sojourn_type[i]) { + + case SEMI_MARKOVIAN : { + occupancy_nb_value[i] = MIN(hsmarkov->state_process->sojourn_time[i]->alloc_nb_value , + max_length + 1); + + complete_run[i] = new Reestimation(occupancy_nb_value[i]); + final_run[i] = new Reestimation(occupancy_nb_value[i]); + if (hsmarkov->type == EQUILIBRIUM) { + initial_run[i] = new Reestimation(occupancy_nb_value[i]); + single_run[i] = new Reestimation(occupancy_nb_value[i]); + } + break; + } + + case MARKOVIAN : { + complete_run[i] = NULL; + final_run[i] = NULL; + if (hsmarkov->type == EQUILIBRIUM) { + initial_run[i] = NULL; + single_run[i] = NULL; + } + break; + } + } + } + + max_nb_value = 0; + for (i = 0;i < hsmarkov->nb_state;i++) { + if ((hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) && (occupancy_nb_value[i] > max_nb_value)) { + max_nb_value = occupancy_nb_value[i]; + } + } + + if (estimator != PARTIAL_LIKELIHOOD) { + occupancy_survivor = new double[max_nb_value]; + censored_occupancy_survivor = new double[max_nb_value + 1]; + } + + observation_reestim = new Reestimation**[hsmarkov->nb_output_process]; + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((marginal_distribution[i]) && ((!(hsmarkov->continuous_parametric_process[i])) || + ((hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)))) { + observation_reestim[i] = new Reestimation*[hsmarkov->nb_state]; + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j] = new Reestimation(marginal_distribution[i]->nb_value); + } + } + + else { + observation_reestim[i] = NULL; + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident == VON_MISES)) { + break; + } + } + + if (i < hsmarkov->nb_output_process) { + mean_direction = new double*[hsmarkov->nb_state]; + for (i = 0;i < hsmarkov->nb_state;i++) { + mean_direction[i] = new double[4]; + } + } + else { + mean_direction = NULL; + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((!marginal_distribution[i]) || ((hsmarkov->continuous_parametric_process[i]) && + ((hsmarkov->continuous_parametric_process[i]->ident == LINEAR_MODEL) || + (hsmarkov->continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL)))) { + break; + } + } + + if (i < hsmarkov->nb_output_process) { + state_sequence_count = new int**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + state_sequence_count[i] = new int*[length[i]]; + for (j = 0;j < length[i];j++) { + state_sequence_count[i][j] = new int[hsmarkov->nb_state]; + } + } + } + else { + state_sequence_count = NULL; + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((!marginal_distribution[i]) || ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL))) { + break; + } + } + + pioutput = new int*[nb_variable]; + proutput = new double*[nb_variable]; + + iter = 0; + nb_likelihood_decrease = 0; + + do { + previous_likelihood = likelihood; + likelihood = 0.; + + // computation of the number of generated state sequences + + if (min_nb_state_sequence + (int)::round(parameter * iter) < max_nb_state_sequence) { + nb_state_sequence = min_nb_state_sequence + (int)::round(parameter * iter); + } + else { + nb_state_sequence = max_nb_state_sequence; + } + +/* nb_state_sequence = max_nb_state_sequence - (int)::round((max_nb_state_sequence - min_nb_state_sequence) * + exp(-parameter * iter)); */ + + iter++; + + // initialization of the reestimation quantities + + chain_reestim->init(); + + for (i = 0;i < hsmarkov->nb_state;i++) { + if (hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) { + for (j = 0;j < occupancy_nb_value[i];j++) { + complete_run[i]->frequency[j] = 0.; + } + + for (j = 0;j < occupancy_nb_value[i];j++) { + final_run[i]->frequency[j] = 0.; + } + + if (hsmarkov->type == EQUILIBRIUM) { + for (j = 0;j < occupancy_nb_value[i];j++) { + initial_run[i]->frequency[j] = 0.; + } + + for (j = 0;j < occupancy_nb_value[i];j++) { + single_run[i]->frequency[j] = 0.; + } + } + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (observation_reestim[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + observation_reestim[i][j]->frequency[k] = 0.; + } + } + } + } + + if (state_sequence_count) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < hsmarkov->nb_state;k++) { + state_sequence_count[i][j][k] = 0; + } + } + } + } + + for (i = 0;i < nb_sequence;i++) { // sequence i + + // forward recurrence + + for (j = 0;j < nb_variable;j++) { + switch (type[j]) { + case INT_VALUE : + pioutput[j] = int_sequence[i][j]; + break; + case REAL_VALUE : + proutput[j] = real_sequence[i][j]; + break; + } + } + + for (j = 0;j < length[i];j++) { // position j at sequence i + norm[j] = 0.; + + for (k = 0;k < hsmarkov->nb_state;k++) { // state k at position j and sequence i + + // computation of the observation probabilities + + observation[j][k] = 1.; + for (m = 0;m < hsmarkov->nb_output_process;m++) { // variable m + if (hsmarkov->categorical_process[m]) { + observation[j][k] *= hsmarkov->categorical_process[m]->observation[k]->mass[*pioutput[m]]; + } + + else if (hsmarkov->discrete_parametric_process[m]) { + observation[j][k] *= hsmarkov->discrete_parametric_process[m]->observation[k]->mass[*pioutput[m]]; + } + + else { + if (((hsmarkov->continuous_parametric_process[m]->ident == GAMMA) || + (hsmarkov->continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (min_value[m] < min_interval[m] / 2)) { + switch (type[m]) { + case INT_VALUE : + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] , *pioutput[m] + min_interval[m]); + break; + case REAL_VALUE : + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] , *proutput[m] + min_interval[m]); + break; + } + } + + else if (hsmarkov->continuous_parametric_process[m]->ident == LINEAR_MODEL) { + switch (type[m]) { + case INT_VALUE : + residual = *pioutput[m] - (hsmarkov->continuous_parametric_process[m]->observation[k]->intercept + + hsmarkov->continuous_parametric_process[m]->observation[k]->slope * + (index_param_type == IMPLICIT_TYPE ? j : index_parameter[i][j])); + break; + case REAL_VALUE : + residual = *proutput[m] - (hsmarkov->continuous_parametric_process[m]->observation[k]->intercept + + hsmarkov->continuous_parametric_process[m]->observation[k]->slope * + (index_param_type == IMPLICIT_TYPE ? j : index_parameter[i][j])); + break; + } + + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else if (hsmarkov->continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (j == 0) { + switch (type[m]) { + case INT_VALUE : + residual = *pioutput[m] - hsmarkov->continuous_parametric_process[m]->observation[k]->location; + break; + case REAL_VALUE : + residual = *proutput[m] - hsmarkov->continuous_parametric_process[m]->observation[k]->location; + break; + } + } + + else { + switch (type[m]) { + case INT_VALUE : + residual = *pioutput[m] - (hsmarkov->continuous_parametric_process[m]->observation[k]->location + + hsmarkov->continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(pioutput[m] - 1) - hsmarkov->continuous_parametric_process[m]->observation[k]->location)); + break; + case REAL_VALUE : + residual = *proutput[m] - (hsmarkov->continuous_parametric_process[m]->observation[k]->location + + hsmarkov->continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(proutput[m] - 1) - hsmarkov->continuous_parametric_process[m]->observation[k]->location)); + break; + } + } + + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else { + switch (type[m]) { + case INT_VALUE : + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] - min_interval[m] / 2 , *pioutput[m] + min_interval[m] / 2); + break; + case REAL_VALUE : + observation[j][k] *= hsmarkov->continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] - min_interval[m] / 2 , *proutput[m] + min_interval[m] / 2); + break; + } + } + } + } + + switch (hsmarkov->sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if (j == 0) { + state_norm[k] = hsmarkov->initial[k]; + } + else { + state_norm[k] += state_in[j - 1][k] - forward1[j - 1][k]; + } + state_norm[k] *= observation[j][k]; + + norm[j] += state_norm[k]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (j == 0) { + forward1[j][k] = hsmarkov->initial[k]; + } + else { + forward1[j][k] = state_in[j - 1][k]; + } + forward1[j][k] *= observation[j][k]; + + norm[j] += forward1[j][k]; + break; + } + } + } + + if (norm[j] > 0.) { + for (k = 0;k < hsmarkov->nb_state;k++) { + switch (hsmarkov->sojourn_type[k]) { + case SEMI_MARKOVIAN : + state_norm[k] /= norm[j]; + break; + case MARKOVIAN : + forward1[j][k] /= norm[j]; + break; + } + } + + likelihood += log(norm[j]); + } + + else { + likelihood = D_INF; + break; + } + + for (k = 0;k < hsmarkov->nb_state;k++) { + + // case semi-Markovian state + + if (hsmarkov->sojourn_type[k] == SEMI_MARKOVIAN) { + occupancy = hsmarkov->state_process->sojourn_time[k]; + obs_product = 1.; + forward1[j][k] = 0.; + + if (j < length[i] - 1) { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[j - m + 1][k] / norm[j - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < j + 1) { + forward1[j][k] += obs_product * occupancy->mass[m] * state_in[j - m][k]; + } + + else { + switch (hsmarkov->type) { + case ORDINARY : + forward1[j][k] += obs_product * occupancy->mass[m] * hsmarkov->initial[k]; + break; + case EQUILIBRIUM : + forward1[j][k] += obs_product * hsmarkov->forward[k]->mass[m] * hsmarkov->initial[k]; + break; + } + } + } + } + + else { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[j - m + 1][k] / norm[j - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < j + 1) { + forward1[j][k] += obs_product * (1. - occupancy->cumul[m - 1]) * state_in[j - m][k]; + } + + else { + switch (hsmarkov->type) { + case ORDINARY : + forward1[j][k] += obs_product * (1. - occupancy->cumul[m - 1]) * + hsmarkov->initial[k]; + break; + case EQUILIBRIUM : + forward1[j][k] += obs_product * (1. - hsmarkov->forward[k]->cumul[m - 1]) * + hsmarkov->initial[k]; + break; + } + } + } + } + } + } + + if (j < length[i] - 1) { + for (k = 0;k < hsmarkov->nb_state;k++) { + state_in[j][k] = 0.; + for (m = 0;m < hsmarkov->nb_state;m++) { + state_in[j][k] += hsmarkov->transition[m][k] * forward1[j][m]; + } + } + } + + for (k = 0;k < nb_variable;k++) { + switch (type[k]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + } + + if (likelihood == D_INF) { + break; + } + +# ifdef DEBUG + for (j = 0;j < length[i];j++) { + cout << j << " : "; + for (k = 0;k < hsmarkov->nb_state;k++) { + cout << forward1[j][k] << " "; + } + cout << endl; + } + cout << endl; +# endif + + // backward passes + + for (j = 0;j < nb_state_sequence;j++) { + k = length[i] - 1; + pstate = state_seq + k; + for (m = 0;m < nb_variable;m++) { + if (type[m] == INT_VALUE) { + pioutput[m] = int_sequence[i][m] + k; + } + } + + cumul_computation(hsmarkov->nb_state , forward1[k] , cumul_backward); + *pstate = cumul_method(hsmarkov->nb_state , cumul_backward); + + // accumulation of the reestimation quantities of the observation distributions + + for (m = 0;m < hsmarkov->nb_output_process;m++) { + if (observation_reestim[m]) { + (observation_reestim[m][*pstate]->frequency[*pioutput[m]])++; + } + } + + if (state_sequence_count) { + (state_sequence_count[i][k][*pstate])++; + } + + do { + + // case semi-Markovian state + + if (hsmarkov->sojourn_type[*pstate] == SEMI_MARKOVIAN) { + occupancy = hsmarkov->state_process->sojourn_time[*pstate]; + obs_product = 1.; + + if (k < length[i] - 1) { + for (m = 1;m <= MIN(k + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[k - m + 1][*pstate] / norm[k - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < k + 1) { + backward[m] = obs_product * occupancy->mass[m] * state_in[k - m][*pstate] / + forward1[k][*pstate]; + } + + else { + switch (hsmarkov->type) { + case ORDINARY : + backward[m] = obs_product * occupancy->mass[m] * hsmarkov->initial[*pstate] / + forward1[k][*pstate]; + break; + case EQUILIBRIUM : + backward[m] = obs_product * hsmarkov->forward[*pstate]->mass[m] * hsmarkov->initial[*pstate] / + forward1[k][*pstate]; + break; + } + } + } + } + + else { + for (m = 1;m <= MIN(k + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[k - m + 1][*pstate] / norm[k - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < k + 1) { + backward[m] = obs_product * (1. - occupancy->cumul[m - 1]) * state_in[k - m][*pstate] / + forward1[k][*pstate]; + } + + else { + switch (hsmarkov->type) { + case ORDINARY : + backward[m] = obs_product * (1. - occupancy->cumul[m - 1]) * + hsmarkov->initial[*pstate] / forward1[k][*pstate]; + break; + case EQUILIBRIUM : + backward[m] = obs_product * (1. - hsmarkov->forward[*pstate]->cumul[m - 1]) * + hsmarkov->initial[*pstate] / forward1[k][*pstate]; + break; + } + } + } + } + + cumul_computation(m - 1 , backward + 1 , cumul_backward); + state_occupancy = 1 + cumul_method(m - 1 , cumul_backward); + +# ifdef DEBUG + sum = 0.; + for (n = 1;n < m;n++) { + sum += backward[n]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << k << " " << sum << endl; + } +# endif + + // accumulation of the reestimation quantities of the state occupancy distributions + + if (k < length[i] - 1) { + if (state_occupancy < k + 1) { + (complete_run[*pstate]->frequency[state_occupancy])++; + } + + else { + switch (hsmarkov->type) { + case ORDINARY : + (complete_run[*pstate]->frequency[state_occupancy])++; + break; + case EQUILIBRIUM : + (initial_run[*pstate]->frequency[state_occupancy])++; + break; + } + } + } + + else { + if (state_occupancy < k + 1) { + (final_run[*pstate]->frequency[state_occupancy])++; + } + + else { + switch (hsmarkov->type) { + case ORDINARY : + (final_run[*pstate]->frequency[state_occupancy])++; + break; + case EQUILIBRIUM : + (single_run[*pstate]->frequency[state_occupancy])++; + break; + } + } + } + + for (m = 1;m < state_occupancy;m++) { + pstate--; + *pstate = *(pstate + 1); + + // accumulation of the reestimation quantities of the observation distributions + + for (n = 0;n < hsmarkov->nb_output_process;n++) { + if (observation_reestim[n]) { + (observation_reestim[n][*pstate]->frequency[*--pioutput[n]])++; + } + } + + if (state_sequence_count) { + (state_sequence_count[i][k - m + 1][*pstate])++; + } + } + k -= (state_occupancy - 1); + + if (k == 0) { + break; + } + } + + k--; + for (m = 0;m < hsmarkov->nb_state;m++) { + backward[m] = hsmarkov->transition[m][*pstate] * forward1[k][m] / state_in[k][*pstate]; + } + cumul_computation(hsmarkov->nb_state , backward , cumul_backward); + *--pstate = cumul_method(hsmarkov->nb_state , cumul_backward); + + // accumulation of the reestimation quantities of the transition probabilities and + // the observation distributions + + (chain_reestim->transition[*pstate][*(pstate + 1)])++; + + for (m = 0;m < hsmarkov->nb_output_process;m++) { + if (observation_reestim[m]) { + (observation_reestim[m][*pstate]->frequency[*--pioutput[m]])++; + } + } + + if (state_sequence_count) { + (state_sequence_count[i][k][*pstate])++; + } + +# ifdef DEBUG + sum = 0.; + for (m = 0;m < hsmarkov->nb_state;m++) { + sum += backward[m]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << k << " " << sum << endl; + } +# endif + + } + while (k > 0); + + // accumulation of the reestimation quantities of the initial probabilities + + if (hsmarkov->type == ORDINARY) { + (chain_reestim->initial[*pstate])++; + } + } + } + + if (likelihood != D_INF) { + if (likelihood < previous_likelihood) { + nb_likelihood_decrease++; + } + else { + nb_likelihood_decrease = 0; + } + + // reestimation of the initial probabilities + + if (hsmarkov->type == ORDINARY) { + reestimation(hsmarkov->nb_state , chain_reestim->initial , + hsmarkov->initial , MIN_PROBABILITY , false); + } + + // reestimation of the transition probabilities + + for (i = 0;i < hsmarkov->nb_state;i++) { + reestimation(hsmarkov->nb_state , chain_reestim->transition[i] , + hsmarkov->transition[i] , MIN_PROBABILITY , false); + } + + // reestimation of the state occupancy distributions + + min_likelihood = 0.; + + for (i = 0;i < hsmarkov->nb_state;i++) { + if (hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) { + occupancy = hsmarkov->state_process->sojourn_time[i]; + + complete_run[i]->nb_value_computation(); + complete_run[i]->offset_computation(); + complete_run[i]->nb_element_computation(); + +# ifdef DEBUG + cout << "\n" << STAT_label[STATL_STATE] << " " << i << " "; + + complete_run[i]->max_computation(); + complete_run[i]->mean_computation(); + complete_run[i]->variance_computation(); + + complete_run[i]->print(cout); +# endif + + if ((iter > STOCHASTIC_EXPLORATION_NB_ITER) && (estimator == COMPLETE_LIKELIHOOD)) { + final_run[i]->nb_value_computation(); + final_run[i]->offset_computation(); + final_run[i]->nb_element_computation(); + + switch (hsmarkov->type) { + case ORDINARY : { + if (final_run[i]->nb_element > 0.) { + complete_run[i]->state_occupancy_estimation(final_run[i] , complete_run[i] , + occupancy_survivor , + censored_occupancy_survivor , false); + } + break; + } + + case EQUILIBRIUM : { + initial_run[i]->nb_value_computation(); + initial_run[i]->offset_computation(); + initial_run[i]->nb_element_computation(); + + single_run[i]->nb_value_computation(); + single_run[i]->offset_computation(); + single_run[i]->nb_element_computation(); + + prun[0] = complete_run[i]; + prun[1] = complete_run[i]; + bcomplete_run = new Reestimation(2 , prun); + + prun[0] = initial_run[i]; + prun[1] = final_run[i]; + prun[2] = single_run[i]; + censored_run = new Reestimation(3 , prun); + +# ifdef DEBUG + censored_run->print(cout); +# endif + + bcomplete_run->state_occupancy_estimation(censored_run , complete_run[i] , + occupancy_survivor , + censored_occupancy_survivor , false); + delete bcomplete_run; + delete censored_run; + break; + } + } + + if ((hsmarkov->type == EQUILIBRIUM) || (final_run[i]->nb_element > 0.)) { + complete_run[i]->nb_value_computation(); + complete_run[i]->offset_computation(); + complete_run[i]->nb_element_computation(); + +# ifdef DEBUG + complete_run[i]->max_computation(); + complete_run[i]->mean_computation(); + complete_run[i]->variance_computation(); + + complete_run[i]->print(cout); +# endif + + } + } + + complete_run[i]->max_computation(); + complete_run[i]->mean_computation(); + complete_run[i]->variance_computation(); + + if (iter <= EXPLORATION_NB_ITER) { + occupancy_likelihood = complete_run[i]->parametric_estimation(occupancy , 1 , true , + OCCUPANCY_THRESHOLD , geometric_poisson); + } + else { + occupancy_likelihood = complete_run[i]->type_parametric_estimation(occupancy , 1 , true , + OCCUPANCY_THRESHOLD , geometric_poisson); + } + +# ifdef DEBUG + if (i == 1) { + occupancy->print(cout); + } +# endif + + if (occupancy_likelihood == D_INF) { + min_likelihood = D_INF; + } + else { + occupancy->computation(complete_run[i]->nb_value , OCCUPANCY_THRESHOLD); + if (hsmarkov->type == EQUILIBRIUM) { + hsmarkov->forward[i]->copy(*occupancy); + hsmarkov->forward[i]->computation(*occupancy); + } + } + +# ifdef DEBUG + cout << STAT_word[STATW_STATE] << " " << i << " " << SEQ_word[SEQW_OCCUPANCY_DISTRIBUTION] << endl; + occupancy->ascii_print(cout); +# endif + + } + } + + if (hsmarkov->type == EQUILIBRIUM) { + hsmarkov->initial_probability_computation(); + } + + // reestimation of the observation distributions + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + reestimation(marginal_distribution[i]->nb_value , observation_reestim[i][j]->frequency , + hsmarkov->categorical_process[i]->observation[j]->mass , + MIN_PROBABILITY , false); + } + } + + else if (observation_reestim[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j]->nb_value_computation(); + observation_reestim[i][j]->offset_computation(); + observation_reestim[i][j]->nb_element_computation(); + observation_reestim[i][j]->max_computation(); + if ((hsmarkov->discrete_parametric_process[i]) || + (hsmarkov->continuous_parametric_process[i]->ident != ZERO_INFLATED_GAMMA)) { + observation_reestim[i][j]->mean_computation(); +// observation_reestim[i][j]->variance_computation(); + observation_reestim[i][j]->variance_computation(true); + } + } + + if (hsmarkov->discrete_parametric_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_likelihood = observation_reestim[i][j]->type_parametric_estimation(hsmarkov->discrete_parametric_process[i]->observation[j] , + 0 , true , OBSERVATION_THRESHOLD); + + if (observation_likelihood == D_INF) { + min_likelihood = D_INF; + } + else { + hsmarkov->discrete_parametric_process[i]->observation[j]->computation(marginal_distribution[i]->nb_value , + OBSERVATION_THRESHOLD); + + if (hsmarkov->discrete_parametric_process[i]->observation[j]->ident == BINOMIAL) { + for (k = hsmarkov->discrete_parametric_process[i]->observation[j]->nb_value;k < marginal_distribution[i]->nb_value;k++) { + hsmarkov->discrete_parametric_process[i]->observation[j]->mass[k] = 0.; + } + } + } + } + } + + else { + switch (hsmarkov->continuous_parametric_process[i]->ident) { + + case GAMMA : { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j]->gamma_estimation(hsmarkov->continuous_parametric_process[i]->observation[j] , iter); + } + break; + } + + case ZERO_INFLATED_GAMMA : { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j]->zero_inflated_gamma_estimation(hsmarkov->continuous_parametric_process[i]->observation[j] , iter); + } + break; + } + + case GAUSSIAN : { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->location = observation_reestim[i][j]->mean; + } + + if (common_dispersion) { + variance = 0.; + nb_element = 0; + + for (j = 0;j < hsmarkov->nb_state;j++) { + for (k = observation_reestim[i][j]->offset;k < observation_reestim[i][j]->nb_value;k++) { + diff = k - observation_reestim[i][j]->mean; + variance += observation_reestim[i][j]->frequency[k] * diff * diff; + } + + nb_element += observation_reestim[i][j]->nb_element; + } + + variance /= nb_element; +// variance /= (nb_element - 1); + + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = sqrt(variance); + } + } + + else { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = sqrt(observation_reestim[i][j]->variance); + if (hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion / + hsmarkov->continuous_parametric_process[i]->observation[j]->location < GAUSSIAN_MIN_VARIATION_COEFF) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = hsmarkov->continuous_parametric_process[i]->observation[j]->location * GAUSSIAN_MIN_VARIATION_COEFF; + } + } + } + break; + } + + case VON_MISES : { + for (j = 0;j < hsmarkov->nb_state;j++) { + observation_reestim[i][j]->mean_direction_computation(mean_direction[j]); + hsmarkov->continuous_parametric_process[i]->observation[j]->location = mean_direction[j][3]; + } + + if (common_dispersion) { + global_mean_direction = 0.; + nb_element = 0; + + for (j = 0;j < hsmarkov->nb_state;j++) { + global_mean_direction += observation_reestim[i][j]->nb_element * mean_direction[j][2]; + nb_element += observation_reestim[i][j]->nb_element; + } + concentration = von_mises_concentration_computation(global_mean_direction / nb_element); + + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = concentration; + } + } + + else { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->dispersion = von_mises_concentration_computation(mean_direction[j][2]); + } + } + break; + } + } + } + } + + else { + switch (hsmarkov->continuous_parametric_process[i]->ident) { + case GAMMA : + gamma_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i] , iter); + break; + case ZERO_INFLATED_GAMMA : + zero_inflated_gamma_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i] , iter); + break; + case GAUSSIAN : + gaussian_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i]); + break; + case VON_MISES : + von_mises_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i]); + break; + case LINEAR_MODEL : + if ((index_param_type != TIME) && (index_param_type != IMPLICIT_TYPE)) { + likelihood = D_INF; + stringstream error_message , correction_message; + error_message << SEQ_error[SEQR_INDEX_PARAMETER_TYPE] << ": shoud be "; + correction_message << SEQ_label[SEQL_TIME] << " or IMPLICIT" << endl; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + linear_model_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i]); + break; + case AUTOREGRESSIVE_MODEL : + autoregressive_model_estimation(state_sequence_count , i , + hsmarkov->continuous_parametric_process[i]); + break; + } + } + } + } + + if (os) { + *os << STAT_label[STATL_ITERATION] << " " << iter << " " + << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << ": " << likelihood + << " (" << nb_state_sequence << ")" << endl; + } + +# ifdef DEBUG + if (iter % 5 == 0) { + cout << *hsmarkov; + } +# endif + + } + while ((likelihood != D_INF) && ((iter <= STOCHASTIC_EXPLORATION_NB_ITER + 2) || + ((nb_iter == I_DEFAULT) && (iter < SEMI_MARKOV_NB_ITER) && + (((likelihood - previous_likelihood) / -likelihood > SEMI_MARKOV_LIKELIHOOD_DIFF) || + (min_likelihood == D_INF) || (nb_likelihood_decrease == 1))) || + ((nb_iter != I_DEFAULT) && (iter < nb_iter)))); + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << iter << " " << STAT_label[STATL_ITERATIONS] << endl; + + if (hsmarkov->type == EQUILIBRIUM) { + for (i = 0;i < hsmarkov->nb_state;i++) { + if (single_run[i]->nb_element > 0) { + *os << "\n" << SEQ_label[SEQL_BIASED] << " " << STAT_label[STATL_STATE] << " " << i + << " " << SEQ_label[SEQL_OCCUPANCY] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + } + } + } + } + + // reestimation of the initial probabilities + + if (hsmarkov->type == ORDINARY) { + reestimation(hsmarkov->nb_state , chain_reestim->initial , + hsmarkov->initial , MIN_PROBABILITY , true); + } + + // reestimation of the transition probabilities + + for (i = 0;i < hsmarkov->nb_state;i++) { + reestimation(hsmarkov->nb_state , chain_reestim->transition[i] , + hsmarkov->transition[i] , MIN_PROBABILITY , true); + } + + if (hsmarkov->type == EQUILIBRIUM) { + hsmarkov->initial_probability_computation(); + } + + for (i = 0;i < hsmarkov->nb_state;i++) { + if ((hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) && + (hsmarkov->state_process->sojourn_time[i]->mean == 1.)) { + hsmarkov->sojourn_type[i] = MARKOVIAN; + delete hsmarkov->state_process->sojourn_time[i]; + hsmarkov->state_process->sojourn_time[i] = NULL; + delete hsmarkov->forward[i]; + hsmarkov->forward[i] = NULL; + } + } + + // reestimation of the categorical observation distributions + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + reestimation(marginal_distribution[i]->nb_value , observation_reestim[i][j]->frequency , + hsmarkov->categorical_process[i]->observation[j]->mass , + MIN_PROBABILITY , true); + } + } + + else if (hsmarkov->discrete_parametric_process[i]) { + hsmarkov->discrete_parametric_process[i]->nb_value_computation(); + } + } + } + + // destruction of the data structures of the algorithm + + for (i = 0;i < max_length;i++) { + delete [] observation[i]; + } + delete [] observation; + + delete [] norm; + delete [] state_norm; + + for (i = 0;i < max_length;i++) { + delete [] forward1[i]; + } + delete [] forward1; + + for (i = 0;i < max_length - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + delete [] backward; + delete [] cumul_backward; + + delete [] state_seq; + + delete chain_reestim; + + for (i = 0;i < hsmarkov->nb_state;i++) { + delete complete_run[i]; + } + delete [] complete_run; + + for (i = 0;i < hsmarkov->nb_state;i++) { + delete final_run[i]; + } + delete [] final_run; + + if (hsmarkov->type == EQUILIBRIUM) { + for (i = 0;i < hsmarkov->nb_state;i++) { + delete initial_run[i]; + } + delete [] initial_run; + + for (i = 0;i < hsmarkov->nb_state;i++) { + delete single_run[i]; + } + delete [] single_run; + } + + delete [] occupancy_nb_value; + + if (estimator != PARTIAL_LIKELIHOOD) { + delete [] occupancy_survivor; + delete [] censored_occupancy_survivor; + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (observation_reestim[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + delete observation_reestim[i][j]; + } + delete [] observation_reestim[i]; + } + } + delete [] observation_reestim; + + if (mean_direction) { + for (i = 0;i < hsmarkov->nb_state;i++) { + delete [] mean_direction[i]; + } + delete [] mean_direction; + } + + if (state_sequence_count) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + delete [] state_sequence_count[i][j]; + } + delete [] state_sequence_count[i]; + } + delete [] state_sequence_count; + } + + delete [] pioutput; + delete [] proutput; + + if (likelihood == D_INF) { + delete hsmarkov; + hsmarkov = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + + else { + if (state_sequence) { + hsmarkov->semi_markov_data = new SemiMarkovData(*this , ADD_STATE_VARIABLE , + (hsmarkov->type == EQUILIBRIUM ? true : false)); + seq = hsmarkov->semi_markov_data; + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (((hsmarkov->discrete_parametric_process[i]) || (hsmarkov->continuous_parametric_process[i])) && + (seq->characteristics[i + 1])) { + delete seq->characteristics[i + 1]; + seq->characteristics[i + 1] = NULL; + } + } + + hsmarkov->forward_backward(*seq); + + hsmarkov->create_cumul(); + hsmarkov->log_computation(); + hsmarkov->viterbi(*seq); + hsmarkov->remove_cumul(); + + seq->min_value_computation(0); + seq->max_value_computation(0); + seq->build_marginal_frequency_distribution(0); + seq->build_characteristic(0 , true , (hsmarkov->type == EQUILIBRIUM ? true : false)); + + seq->build_transition_count(hsmarkov); + seq->build_observation_frequency_distribution(hsmarkov->nb_state); + seq->build_observation_histogram(hsmarkov->nb_state); + + // computation of the state occupancy distributions + + for (i = 0;i < hsmarkov->nb_state;i++) { + if (hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) { + hsmarkov->state_process->sojourn_time[i]->computation((seq->characteristics[0] ? seq->characteristics[0]->sojourn_time[i]->nb_value : 1) , + OCCUPANCY_THRESHOLD); + if (hsmarkov->stype[i] == RECURRENT) { + if (hsmarkov->type == ORDINARY) { + hsmarkov->forward[i]->copy(*(hsmarkov->state_process->sojourn_time[i])); + } + hsmarkov->forward[i]->computation(*(hsmarkov->state_process->sojourn_time[i])); + } + } + } + + // computation of the mixtures of observation distributions (weights deduced from the restoration) + + weight = NULL; + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((hsmarkov->categorical_process[i]) || (hsmarkov->discrete_parametric_process[i]) || + ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL))) { + weight = seq->weight_computation(); + break; + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + hsmarkov->categorical_process[i]->restoration_weight = new Distribution(*weight); + hsmarkov->categorical_process[i]->restoration_mixture = hsmarkov->categorical_process[i]->mixture_computation(hsmarkov->categorical_process[i]->restoration_weight); + } + + else if (hsmarkov->discrete_parametric_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->discrete_parametric_process[i]->observation[j]->cumul_computation(); + } + + hsmarkov->discrete_parametric_process[i]->restoration_weight = new Distribution(*weight); + hsmarkov->discrete_parametric_process[i]->restoration_mixture = hsmarkov->discrete_parametric_process[i]->mixture_computation(hsmarkov->discrete_parametric_process[i]->restoration_weight); + } + + else if ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)) { + hsmarkov->continuous_parametric_process[i]->restoration_weight = new Distribution(*weight); + } + } + + delete weight; + + if ((os) && (seq->characteristics[0])) { + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD] << ": " << seq->restoration_likelihood; + + for (i = 0;i < nb_variable;i++) { + if (type[i] == REAL_VALUE) { + break; + } + } + if (i == nb_variable) { + *os << " | " << hsmarkov->SemiMarkov::likelihood_computation(*seq); + } + *os << endl; + } + } + + else { + if (hsmarkov->type == ORDINARY) { + for (i = 0;i < hsmarkov->nb_state;i++) { + if ((hsmarkov->sojourn_type[i] == SEMI_MARKOVIAN) && (hsmarkov->stype[i] == RECURRENT)) { + hsmarkov->forward[i]->copy(*(hsmarkov->state_process->sojourn_time[i])); + hsmarkov->forward[i]->computation(*(hsmarkov->state_process->sojourn_time[i])); + } + } + } + + hsmarkov->semi_markov_data = new SemiMarkovData(*this , SEQUENCE_COPY , + (hsmarkov->type == EQUILIBRIUM ? true : false)); + seq = hsmarkov->semi_markov_data; + if (seq->type[0] == STATE) { + seq->state_variable_init(INT_VALUE); + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (((hsmarkov->discrete_parametric_process[i]) || (hsmarkov->continuous_parametric_process[i])) && + (seq->characteristics[i])) { + delete seq->characteristics[i]; + seq->characteristics[i] = NULL; + } + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->categorical_process[i]->observation[j]->cumul_computation(); + + hsmarkov->categorical_process[i]->observation[j]->max_computation(); +// hsmarkov->categorical_process[i]->observation[j]->mean_computation(); +// hsmarkov->categorical_process[i]->observation[j]->variance_computation(); + } + } + } + + // computation of the log-likelihood and the characteristic distributions of the model + + seq->likelihood = hsmarkov->likelihood_computation(*this , seq->posterior_probability); + + hsmarkov->component_computation(); + hsmarkov->characteristic_computation(*seq , counting_flag , I_DEFAULT , false); + + // computation of the mixtures of observation distributions (theoretical weights) + + weight = NULL; + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((hsmarkov->categorical_process[i]) || (hsmarkov->discrete_parametric_process[i]) || + ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL))) { + switch (hsmarkov->type) { + case ORDINARY : + weight = hsmarkov->state_process->weight_computation(); + break; + case EQUILIBRIUM : + weight = new Distribution(hsmarkov->nb_state , hsmarkov->initial); + break; + } + break; + } + } + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if (hsmarkov->categorical_process[i]) { + hsmarkov->categorical_process[i]->weight = new Distribution(*weight); + hsmarkov->categorical_process[i]->mixture = hsmarkov->categorical_process[i]->mixture_computation(hsmarkov->categorical_process[i]->weight); + } + + else if (hsmarkov->discrete_parametric_process[i]) { + hsmarkov->discrete_parametric_process[i]->weight = new Distribution(*weight); + hsmarkov->discrete_parametric_process[i]->mixture = hsmarkov->discrete_parametric_process[i]->mixture_computation(hsmarkov->discrete_parametric_process[i]->weight); + } + + else if ((hsmarkov->continuous_parametric_process[i]) && + (hsmarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (hsmarkov->continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)) { + hsmarkov->continuous_parametric_process[i]->weight = new Distribution(*weight); + } + } + + delete weight; + + // update of the sample sizes for the computation of the confidence intervals on the slopes and + // the correlation coefficients (linear observation model) and the autoregressive coefficients (autoregressive models) + + for (i = 0;i < hsmarkov->nb_output_process;i++) { + if ((hsmarkov->continuous_parametric_process[i]->ident == LINEAR_MODEL) || + (hsmarkov->continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL)) { + for (j = 0;j < hsmarkov->nb_state;j++) { + hsmarkov->continuous_parametric_process[i]->observation[j]->sample_size /= nb_state_sequence; + } + } + } + + if ((os) && (state_sequence) && (seq->nb_sequence <= POSTERIOR_PROBABILITY_NB_SEQUENCE)) { + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] << endl; + for (i = 0;i < seq->nb_sequence;i++) { + *os << SEQ_label[SEQL_SEQUENCE] << " " << seq->identifier[i] << ": " + << seq->posterior_probability[i]; + + if (hsmarkov->nb_component == hsmarkov->nb_state) { + *os << " | " << SEQ_label[SEQL_STATE_BEGIN] << ": "; + + pstate = seq->int_sequence[i][0] + 1; + if (seq->index_parameter) { + for (j = 1;j < seq->length[i];j++) { + if (*pstate != *(pstate - 1)) { + *os << seq->index_parameter[i][j] << ", "; + } + pstate++; + } + } + + else { + for (j = 1;j < seq->length[i];j++) { + if (*pstate != *(pstate - 1)) { + *os << j << ", "; + } + pstate++; + } + } + } + + *os << endl; + } + } + } + } + + return hsmarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a hidden semi-Markov chain using the MCEM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] nb_state number of states, + * \param[in] left_right flag on the Markov chain structure, + * \param[in] occupancy_mean mean state occupancy, + * \param[in] geometric_poisson flag on the estimation of Poisson geometric state occupancy distributions, + * \param[in] common_dispersion flag common dispersion parameter (continuous observation processes), + * \param[in] min_nb_state_sequence minimum number of generated sequences, + * \param[in] max_nb_state_sequence maximum number of generated sequences, + * \param[in] parameter parameter for defining the number of generated sequences, + * \param[in] estimator estimator type for the reestimation of the state occupancy distributions + * (complete or partial likelihood), + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] state_sequence flag on the computation of the restored state sequences, + * \param[in] nb_iter number of iterations. + * + * \return HiddenSemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenSemiMarkov* MarkovianSequences::hidden_semi_markov_stochastic_estimation(StatError &error , ostream *os , + process_type itype , int nb_state , + bool left_right , double occupancy_mean , + bool geometric_poisson , bool common_dispersion , + int min_nb_state_sequence , + int max_nb_state_sequence , double parameter , + censoring_estimator estimator , + bool counting_flag , bool state_sequence , + int nb_iter) const + +{ + bool status = true; + int i; + int nb_value[SEQUENCE_NB_VARIABLE]; + double proba , mean , variance; + HiddenSemiMarkov *ihsmarkov , *hsmarkov; + + + hsmarkov = NULL; + error.init(); + + if ((nb_state < 2) || (nb_state > NB_STATE)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + if ((occupancy_mean != D_DEFAULT) && (occupancy_mean <= 1.)) { + status = false; + error.update(SEQ_error[SEQR_OCCUPANCY]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (status) { + for (i = 0;i < nb_variable;i++) { + if (marginal_distribution[i]) { + nb_value[i] = marginal_distribution[i]->nb_value; + } + else { + nb_value[i] = I_DEFAULT; + } + } + + ihsmarkov = new HiddenSemiMarkov(itype , nb_state , nb_variable , nb_value); + + // initialization of the Markov chain parameters + + ihsmarkov->init(left_right , 0.); + + // initialization of the state occupancy distributions + + if (occupancy_mean == D_DEFAULT) { + occupancy_mean = MAX(length_distribution->mean , OCCUPANCY_MEAN); + } + +# ifdef DEBUG + assert(ihsmarkov->sojourn_type == NULL); +# endif + + ihsmarkov->sojourn_type = new state_sojourn_type[nb_state]; + ihsmarkov->state_process->absorption = new double[nb_state]; + ihsmarkov->state_process->sojourn_time = new DiscreteParametric*[nb_state]; + ihsmarkov->forward = new Forward*[nb_state]; + + for (i = 0;i < nb_state;i++) { + if (ihsmarkov->stype[i] != ABSORBING) { + ihsmarkov->sojourn_type[i] = SEMI_MARKOVIAN; + ihsmarkov->state_process->absorption[i] = 0.; + proba = 1. / occupancy_mean; + ihsmarkov->state_process->sojourn_time[i] = new DiscreteParametric(NEGATIVE_BINOMIAL , 1 , + I_DEFAULT , 1. , proba , + OCCUPANCY_THRESHOLD); + + if (ihsmarkov->stype[i] == RECURRENT) { + ihsmarkov->forward[i] = new Forward(*(ihsmarkov->state_process->sojourn_time[i]) , + ihsmarkov->state_process->sojourn_time[i]->alloc_nb_value); + } + else { + ihsmarkov->forward[i] = NULL; + } + } + + else { + ihsmarkov->sojourn_type[i] = MARKOVIAN; + ihsmarkov->state_process->absorption[i] = 1.; + ihsmarkov->state_process->sojourn_time[i] = NULL; + ihsmarkov->forward[i] = NULL; + } + } + + // initialization of the observation distributions + + for (i = 0;i < ihsmarkov->nb_output_process;i++) { + if (ihsmarkov->categorical_process[i]) { + ihsmarkov->categorical_process[i]->init(); + } + + else if (ihsmarkov->discrete_parametric_process[i]) { + ihsmarkov->discrete_parametric_process[i]->init(); + } + + else { + mean = mean_computation(i); + variance = variance_computation(i , mean); + + ihsmarkov->continuous_parametric_process[i]->init(GAUSSIAN , min_value[i] , max_value[i] , + mean , variance); + } + } + + hsmarkov = hidden_semi_markov_stochastic_estimation(error , os , *ihsmarkov , geometric_poisson , + common_dispersion , min_nb_state_sequence , + max_nb_state_sequence , parameter , estimator , + counting_flag , state_sequence , nb_iter); + delete ihsmarkov; + } + + return hsmarkov; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/hsmc_algorithms2.cpp b/src/cpp/sequence_analysis/hsmc_algorithms2.cpp new file mode 100644 index 0000000..862ec57 --- /dev/null +++ b/src/cpp/sequence_analysis/hsmc_algorithms2.cpp @@ -0,0 +1,7321 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "hidden_semi_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state sequence entropies using the forward-backward algorithm. + * + * \param[in] seq reference on a SemiMarkovData object, + */ +/*--------------------------------------------------------------*/ + +void HiddenSemiMarkov::forward_backward(SemiMarkovData &seq) const + +{ + bool posterior_state_probability_flag; + int i , j , k , m , n; + int **pioutput; + double seq_likelihood , obs_product , residual , buff , sum , **observation , + *norm , *state_norm , **forward1 , **state_in , *transition_predicted , + *occupancy_predicted , **state_entropy , **predicted_entropy , **proutput; + DiscreteParametric *occupancy; + +# ifdef MESSAGE + double entropy , **backward , **backward1 , *auxiliary , *occupancy_auxiliary , + **transition_entropy , **occupancy_entropy; +# endif + +# ifdef DEBUG + double *backward0 = new double[nb_state]; +# endif + // initializations + + seq.entropy = new double[seq.nb_sequence]; + seq.nb_state_sequence = new double[seq.nb_sequence]; + + posterior_state_probability_flag = parallel_initial_state(); + if (posterior_state_probability_flag) { + seq.posterior_state_probability = new double[seq.nb_sequence]; + } + + observation = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + observation[i] = new double[nb_state]; + } + + norm = new double[seq.max_length]; + state_norm = new double[nb_state]; + + forward1 = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + forward1[i] = new double[nb_state]; + } + + state_in = new double*[seq.max_length - 1]; + for (i = 0;i < seq.max_length - 1;i++) { + state_in[i] = new double[nb_state]; + } + + transition_predicted = new double[nb_state]; + occupancy_predicted = new double[seq.max_length + 1]; + + state_entropy = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + state_entropy[i] = new double[nb_state]; + } + + predicted_entropy = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + predicted_entropy[i] = new double[nb_state]; + } + +# ifdef MESSAGE + backward = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + backward[i] = new double[nb_state]; + } + + backward1 = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + backward1[i] = new double[nb_state]; + } + + auxiliary = new double[nb_state]; + occupancy_auxiliary = new double[seq.max_length + 1]; + + transition_entropy = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + transition_entropy[i] = new double[nb_state]; + } + + occupancy_entropy = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + switch (sojourn_type[i]) { + case SEMI_MARKOVIAN : + occupancy = state_process->sojourn_time[i]; + occupancy_entropy[i] = new double[MIN(seq.max_length , occupancy->nb_value)]; + break; + case MARKOVIAN : + occupancy_entropy[i] = NULL; + break; + } + } +# endif + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + seq.sample_entropy = 0.; + + for (i = 0;i < seq.nb_sequence;i++) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j + 1]; + break; + } + } + + // forward recurrence + + seq_likelihood = 0.; + for (j = 0;j < seq.length[i];j++) { + norm[j] = 0.; + + for (k = 0;k < nb_state;k++) { + + // computation of the observation probabilities + + observation[j][k] = 1.; + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + observation[j][k] *= categorical_process[m]->observation[k]->mass[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + observation[j][k] *= discrete_parametric_process[m]->observation[k]->mass[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1]); + break; + case REAL_VALUE : + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1]); + break; + } + } + + else if (continuous_parametric_process[m]->ident == LINEAR_MODEL) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + } + + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else if (continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (j == 0) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + case REAL_VALUE : + residual = *proutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(pioutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(proutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + } + } + + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2); + break; + case REAL_VALUE : + observation[j][k] *= continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2); + break; + } + } + } + } + + switch (sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if (j == 0) { + state_norm[k] = initial[k]; + } + else { + state_norm[k] += state_in[j - 1][k] - forward1[j - 1][k]; + } + state_norm[k] *= observation[j][k]; + + norm[j] += state_norm[k]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (j == 0) { + forward1[j][k] = initial[k]; + state_entropy[j][k] = 0.; + } + else { + forward1[j][k] = state_in[j - 1][k]; + state_entropy[j][k] = predicted_entropy[j - 1][k]; + } + forward1[j][k] *= observation[j][k]; + + norm[j] += forward1[j][k]; + break; + } + } + } + + if (norm[j] > 0.) { + for (k = 0;k < nb_state;k++) { + switch (sojourn_type[k]) { + case SEMI_MARKOVIAN : + state_norm[k] /= norm[j]; + break; + case MARKOVIAN : + forward1[j][k] /= norm[j]; + break; + } + } + + seq_likelihood += log(norm[j]); + } + + else { + seq_likelihood = D_INF; + break; + } + + for (k = 0;k < nb_state;k++) { + + // case semi-Markovian state + + if (sojourn_type[k] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[k]; + obs_product = 1.; + forward1[j][k] = 0.; + + if (j < seq.length[i] - 1) { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[j - m + 1][k] / norm[j - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < j + 1) { + occupancy_predicted[m] = obs_product * occupancy->mass[m] * state_in[j - m][k]; +// forward1[j][k] += obs_product * occupancy->mass[m] * state_in[j - m][k]; + } + + else { + switch (type) { + case ORDINARY : + occupancy_predicted[m] = obs_product * occupancy->mass[m] * initial[k]; +// forward1[j][k] += obs_product * occupancy->mass[m] * initial[k]; + break; + case EQUILIBRIUM : + occupancy_predicted[m] = obs_product * forward[k]->mass[m] * initial[k]; +// forward1[j][k] += obs_product * forward[k]->mass[m] * initial[k]; + break; + } + } + + forward1[j][k] += occupancy_predicted[m]; + } + } + + else { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + obs_product *= observation[j - m + 1][k] / norm[j - m + 1]; + if (obs_product == 0.) { + break; + } + + if (m < j + 1) { + occupancy_predicted[m] = obs_product * (1. - occupancy->cumul[m - 1]) * state_in[j - m][k]; +// forward1[j][k] += obs_product * (1. - occupancy->cumul[m - 1]) * state_in[j - m][k]; + } + + else { + switch (type) { + case ORDINARY : + occupancy_predicted[m] = obs_product * (1. - occupancy->cumul[m - 1]) * initial[k]; +// forward1[j][k] += obs_product * (1. - occupancy->cumul[m - 1]) * initial[k]; + break; + case EQUILIBRIUM : + occupancy_predicted[m] = obs_product * (1. - forward[k]->cumul[m - 1]) * initial[k]; +// forward1[j][k] += obs_product * (1. - forward[k]->cumul[m - 1]) * initial[k]; + break; + } + } + + forward1[j][k] += occupancy_predicted[m]; + } + } + + state_entropy[j][k] = 0.; + + if (forward1[j][k] > 0.) { + for (n = 1;n < m;n++) { + buff = occupancy_predicted[n] / forward1[j][k]; + if (buff > 0.) { + if (n < j + 1) { + state_entropy[j][k] += buff * (predicted_entropy[j - n][k] - log(buff)); + } + else { + state_entropy[j][k] -= buff * log(buff); + } + } + } + + if (state_entropy[j][k] < 0.) { + state_entropy[j][k] = 0.; + } + } + } + } + + if (j < seq.length[i] - 1) { + for (k = 0;k < nb_state;k++) { + state_in[j][k] = 0.; + for (m = 0;m < nb_state;m++) { + transition_predicted[m] = transition[m][k] * forward1[j][m]; + state_in[j][k] += transition_predicted[m]; +// state_in[j][k] += transition[m][k] * forward1[j][m]; + } + + predicted_entropy[j][k] = 0.; + + if (state_in[j][k] > 0.) { + for (m = 0;m < nb_state;m++) { + buff = transition_predicted[m] / state_in[j][k]; + if (buff > 0.) { + predicted_entropy[j][k] += buff * (state_entropy[j][m] - log(buff)); + } + } + + if (predicted_entropy[j][k] < 0.) { + predicted_entropy[j][k] = 0.; + } + } + } + } + + for (k = 0;k < nb_output_process;k++) { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + } + + if (seq_likelihood != D_INF) { + seq.entropy[i] = 0.; + j = seq.length[i] - 1; + for (k = 0;k < nb_state;k++) { + if (forward1[j][k] > 0.) { + seq.entropy[i] += forward1[j][k] * (state_entropy[j][k] - log(forward1[j][k])); + } + } + seq.sample_entropy += seq.entropy[i]; + +/* for (j = 0;j < nb_state;j++) { + if (sojourn_type[j] == SEMI_MARKOVIAN) { + for (k = 0;k < seq.length[i];k++) { + state_entropy[k][j] = 0.; + } + } + } */ + + // backward recurrence + +# ifdef MESSAGE + entropy = 0.; + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]--; + break; + case REAL_VALUE : + proutput[j]--; + break; + } + } + + for (j = 0;j < nb_state;j++) { + for (k = 0;k < nb_state;k++) { + transition_entropy[j][k] = 0.; + } + } + + for (j = 0;j < nb_state;j++) { + if (sojourn_type[j] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[j]; + for (k = occupancy->offset;k < MIN(seq.length[i] , occupancy->nb_value);k++) { + occupancy_entropy[j][k] = 0.; + } + } + } + + j = seq.length[i] - 1; + for (k = 0;k < nb_state;k++) { + backward[j][k] = forward1[j][k]; + backward1[j][k] = backward[j][k]; + + if (backward[j][k] > 0.) { + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + if (categorical_process[m]->observation[k]->mass[*pioutput[m]] > 0.) { + entropy -= backward[j][k] * log(categorical_process[m]->observation[k]->mass[*pioutput[m]]); + } + } + + else if (discrete_parametric_process[m]) { + if (discrete_parametric_process[m]->observation[k]->mass[*pioutput[m]] > 0.) { + entropy -= backward[j][k] * log(discrete_parametric_process[m]->observation[k]->mass[*pioutput[m]]); + } + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1])); + break; + case REAL_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1])); + break; + } + } + + else if (continuous_parametric_process[m]->ident == LINEAR_MODEL) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + } + + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual)); + } + + else if (continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (j == 0) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + case REAL_VALUE : + residual = *proutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(pioutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(proutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + } + } + + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual)); + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2)); + break; + case REAL_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2)); + break; + } + } + } + } + } + } + + for (j = seq.length[i] - 2;j >= 0;j--) { + for (k = 0;k < nb_output_process;k++) { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]--; + break; + case REAL_VALUE : + proutput[k]--; + break; + } + } + + for (k = 0;k < nb_state;k++) { + auxiliary[k] = 0.; + + switch (sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[k]; + obs_product = 1.; + + for (m = 1;m < MIN(seq.length[i] - j , occupancy->nb_value);m++) { + obs_product *= observation[j + m][k] / norm[j + m]; + if (obs_product == 0.) { + break; + } + + occupancy_auxiliary[m] = 0.; + + if (backward1[j + m][k] > 0.) { +// if (forward1[j + m][k] > 0.) { + if (m < seq.length[i] - j - 1) { + buff = backward1[j + m][k] * obs_product * occupancy->mass[m] / + forward1[j + m][k]; + occupancy_auxiliary[m] = buff * state_in[j][k]; + occupancy_entropy[k][m] += occupancy_auxiliary[m]; + +/* if (occupancy->mass[m] > 0.) { + entropy -= occupancy_auxiliary[m] * log(occupancy->mass[m]); + } */ + } + + else { + buff = obs_product * (1. - occupancy->cumul[m - 1]); + occupancy_auxiliary[m] = buff * state_in[j][k]; + if (occupancy->cumul[m - 1] < 1.) { + entropy -= occupancy_auxiliary[m] * log(1. - occupancy->cumul[m - 1]); + } + } + + auxiliary[k] += buff; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (backward1[j + 1][k] > 0.) { +// if (forward1[j + 1][k] > 0.) { + auxiliary[k] = backward1[j + 1][k] / state_in[j][k]; + +/* auxiliary[k] = backward1[j + 1][k] * observation[j + 1][k] / + (forward1[j + 1][k] * norm[j + 1]); */ + } + break; + } + } + } + + for (k = 0;k < nb_state;k++) { + backward1[j][k] = 0.; + + for (m = 0;m < nb_state;m++) { + buff = auxiliary[m] * transition[k][m] * forward1[j][k]; + backward1[j][k] += buff; + transition_entropy[k][m] += buff; + +/* if (transition[k][m] > 0.) { + entropy -= buff * log(transition[k][m]); + } */ + } + + switch (sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + backward[j][k] = backward[j + 1][k] + backward1[j][k] - auxiliary[k] * state_in[j][k]; + if (backward[j][k] < 0.) { + backward[j][k] = 0.; + } + if (backward[j][k] > 1.) { + backward[j][k] = 1.; + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + backward[j][k] = backward1[j][k]; + break; + } + } + + if (backward[j][k] > 0.) { + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + if (categorical_process[m]->observation[k]->mass[*pioutput[m]] > 0.) { + entropy -= backward[j][k] * log(categorical_process[m]->observation[k]->mass[*pioutput[m]]); + } + } + + else if (discrete_parametric_process[m]) { + if (discrete_parametric_process[m]->observation[k]->mass[*pioutput[m]] > 0.) { + entropy -= backward[j][k] * log(discrete_parametric_process[m]->observation[k]->mass[*pioutput[m]]); + } + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1])); + break; + case REAL_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1])); + break; + } + } + + else if (continuous_parametric_process[m]->ident == LINEAR_MODEL) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + } + + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual)); + } + + else if (continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (j == 0) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + case REAL_VALUE : + residual = *proutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(pioutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(proutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + } + } + + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual)); + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2)); + break; + case REAL_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2)); + break; + } + } + } + } + } + } + } + + if (posterior_state_probability_flag) { + seq.posterior_state_probability[i] = 0.; + for (j = 0;j < nb_state;j++) { + if (backward[0][j] > seq.posterior_state_probability[i]) { + seq.posterior_state_probability[i] = backward[0][j]; + } + } + } + + for (j = 0;j < nb_state;j++) { + if (initial[j] > 0.) { + entropy -= backward[0][j] * log(initial[j]); + } + } + + for (j = 0;j < nb_state;j++) { + for (k = 0;k < nb_state;k++) { + if (transition[j][k] > 0.) { + entropy -= transition_entropy[j][k] * log(transition[j][k]); + } + } + } + + for (j = 0;j < nb_state;j++) { + if (sojourn_type[j] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[j]; + + if (initial[j] > 0.) { + obs_product = 1.; + +# ifdef DEBUG + backward0[j] = 0.; +# endif + + for (k = 1;k < MIN(seq.length[i] + 1 , occupancy->nb_value);k++) { + obs_product *= observation[k - 1][j] / norm[k - 1]; + if (obs_product == 0.) { + break; + } + + occupancy_auxiliary[k] = 0.; + + if (backward1[k - 1][j] > 0.) { +// if (forward1[k - 1][j] > 0.) { + if (k < seq.length[i]) { + switch (type) { + + case ORDINARY : { + occupancy_auxiliary[k] = backward1[k - 1][j] * obs_product * occupancy->mass[k] * + initial[j] / forward1[k - 1][j]; + occupancy_entropy[j][k] += occupancy_auxiliary[k]; + +/* if (occupancy->mass[k] > 0.) { + entropy -= occupancy_auxiliary[k] * log(occupancy->mass[k]); + } */ + break; + } + + case EQUILIBRIUM : { + occupancy_auxiliary[k] = backward1[k - 1][j] * obs_product * forward[j]->mass[k] * + initial[j] / forward1[k - 1][j]; + if (forward[j]->mass[k] > 0.) { + entropy -= occupancy_auxiliary[k] * log(forward[j]->mass[k]); + } + break; + } + } + } + + else { + switch (type) { + + case ORDINARY : { + occupancy_auxiliary[k] = obs_product * (1. - occupancy->cumul[k - 1]) * initial[j]; + if (occupancy->cumul[k - 1] < 1.) { + entropy -= occupancy_auxiliary[k] * log(1. - occupancy->cumul[k - 1]); + } + break; + } + + case EQUILIBRIUM : { + occupancy_auxiliary[k] = obs_product * (1. - forward[j]->cumul[k - 1]) * initial[j]; + if (forward[j]->cumul[k - 1] < 1.) { + entropy -= occupancy_auxiliary[k] * log(1. - forward[j]->cumul[k - 1]); + } + break; + } + } + } + +# ifdef DEBUG + backward0[j] += occupancy_auxiliary[k]; +# endif + + } + } + +# ifdef DEBUG + cout << j << " " << backward0[j] << " " << backward0[j] << endl; +# endif + } + + for (k = occupancy->offset;k < MIN(seq.length[i] , occupancy->nb_value);k++) { + if (occupancy->mass[k] > 0.) { + entropy -= occupancy_entropy[j][k] * log(occupancy->mass[k]); + } + } + } + } + + entropy += seq_likelihood; + + if ((entropy < seq.entropy[i] - DOUBLE_ERROR) || (entropy > seq.entropy[i] + DOUBLE_ERROR)) { + cout << "\nERROR: " << i << " " << seq.entropy[i] << " " << entropy << endl; + } +# endif + + // computation of the number of state sequences + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j + 1]; + break; + } + } + + // forward recurrence + + for (j = 0;j < seq.length[i];j++) { + for (k = 0;k < nb_state;k++) { + + // computation of the indicator functions of the observation probabilities + + if (observation[j][k] > 0.) { + observation[j][k] = 1.; + } + + forward1[j][k] = 0.; + + switch (sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[k]; + + if (j < seq.length[i] - 1) { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + if (observation[j - m + 1][k] == 0.) { + break; + } + + if (m < j + 1) { + if (occupancy->mass[m] > 0.) { + forward1[j][k] += state_in[j - m][k]; + } + } + + else { + if (initial[k] > 0.) { + switch (type) { + + case ORDINARY : { + if (occupancy->mass[m] > 0.) { + forward1[j][k]++; + } + break; + } + + case EQUILIBRIUM : { + if (forward[k]->mass[m] > 0.) { + forward1[j][k]++; + } + break; + } + } + } + } + } + } + + else { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + if (observation[j - m + 1][k] == 0.) { + break; + } + + if (m < j + 1) { + if (1. - occupancy->cumul[m - 1] > 0.) { + forward1[j][k] += state_in[j - m][k]; + } + } + + else { + if (initial[k] > 0.) { + switch (type) { + + case ORDINARY : { + if (1. - occupancy->cumul[m - 1] > 0.) { + forward1[j][k]++; + } + break; + } + + case EQUILIBRIUM : { + if (1. - forward[k]->cumul[m - 1] > 0.) { + forward1[j][k]++; + } + break; + } + } + } + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (observation[j][k] == 1.) { + if (j == 0) { + if (initial[k] > 0.) { + forward1[j][k] = 1.; + } + } + else { + forward1[j][k] = state_in[j - 1][k]; + } + } + break; + } + } + } + + if (j < seq.length[i] - 1) { + for (k = 0;k < nb_state;k++) { + state_in[j][k] = 0.; + for (m = 0;m < nb_state;m++) { + if (transition[m][k] > 0.) { + state_in[j][k] += forward1[j][m]; + } + } + } + } + + for (k = 0;k < nb_output_process;k++) { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + } + + seq.nb_state_sequence[i] = 0.; + j = seq.length[i] - 1; + for (k = 0;k < nb_state;k++) { + seq.nb_state_sequence[i] += forward1[j][k]; + } + } + } + + for (i = 0;i < seq.max_length;i++) { + delete [] observation[i]; + } + delete [] observation; + + delete [] norm; + delete [] state_norm; + + for (i = 0;i < seq.max_length;i++) { + delete [] forward1[i]; + } + delete [] forward1; + + for (i = 0;i < seq.max_length - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + delete [] transition_predicted; + delete [] occupancy_predicted; + + for (i = 0;i < seq.max_length;i++) { + delete [] state_entropy[i]; + } + delete [] state_entropy; + + for (i = 0;i < seq.max_length;i++) { + delete [] predicted_entropy[i]; + } + delete [] predicted_entropy; + +# ifdef DEBUG + delete [] backward0; +# endif + +# ifdef MESSAGE + for (i = 0;i < seq.max_length;i++) { + delete [] backward[i]; + } + delete [] backward; + + for (i = 0;i < seq.max_length;i++) { + delete [] backward1[i]; + } + delete [] backward1; + + delete [] auxiliary; + delete [] occupancy_auxiliary; + + for (i = 0;i < nb_state;i++) { + delete [] transition_entropy[i]; + } + delete [] transition_entropy; + + for (i = 0;i < nb_state;i++) { + delete [] occupancy_entropy[i]; + } + delete [] occupancy_entropy; +# endif + + delete [] pioutput; + delete [] proutput; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index, + * \param[in] os stream, + * \param[in] plot_set pointer on a MultiPlotSet object, + * \param[in] output output type, + * \param[in] format output format (ASCII/SPREADSHEET/GNUPLOT/PLOT), + * \param[in] max_marginal_entropy reference on the maximum marginal entropy, + * \param[in] entropy1 reference on the entropy (for the plots). + * + * \return log-likelihood for the observed sequence. + */ +/*--------------------------------------------------------------*/ + +double HiddenSemiMarkov::forward_backward(MarkovianSequences &seq , int index , ostream *os , + MultiPlotSet *plot_set , state_profile output , + output_format format , double &max_marginal_entropy , + double &entropy1) const + +{ + int i , j , k , m; + int *pstate , **pioutput; + double seq_likelihood , state_seq_likelihood , obs_product , residual , entropy2 , buff , sum , + backward_max , **observation , *norm , *state_norm , **forward1 , **state_in , + **backward , **backward1 , *auxiliary , *occupancy_auxiliary , **backward_output , + *transition_predicted , *occupancy_predicted , **state_entropy , **predicted_entropy , + **transition_entropy , **occupancy_entropy , *partial_entropy , *conditional_entropy , + *marginal_entropy , **proutput; + DiscreteParametric *occupancy; + + + // initializations + + observation = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + observation[i] = new double[nb_state]; + } + + norm = new double[seq.length[index]]; + state_norm = new double[nb_state]; + + forward1 = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + forward1[i] = new double[nb_state]; + } + + state_in = new double*[seq.length[index] - 1]; + for (i = 0;i < seq.length[index] - 1;i++) { + state_in[i] = new double[nb_state]; + } + + backward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward[i] = new double[nb_state]; + } + + backward1 = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward1[i] = new double[nb_state]; + } + + auxiliary = new double[nb_state]; + occupancy_auxiliary = new double[seq.length[index] + 1]; + + if (output == SSTATE) { + backward_output = backward; + } + else { + backward_output = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward_output[i] = new double[nb_state]; + } + } + + transition_predicted = new double[nb_state]; + occupancy_predicted = new double[seq.length[index] + 1]; + + state_entropy = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + state_entropy[i] = new double[nb_state]; + } + + predicted_entropy = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + predicted_entropy[i] = new double[nb_state]; + } + + transition_entropy = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + transition_entropy[i] = new double[nb_state]; + } + + occupancy_entropy = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + switch (sojourn_type[i]) { + case SEMI_MARKOVIAN : + occupancy = state_process->sojourn_time[i]; + occupancy_entropy[i] = new double[MIN(seq.length[index] , occupancy->nb_value)]; + break; + case MARKOVIAN : + occupancy_entropy[i] = NULL; + break; + } + } + + partial_entropy = new double[seq.length[index]]; + conditional_entropy = new double[seq.length[index]]; + marginal_entropy = new double[seq.length[index]]; + +# ifdef DEBUG + double *backward0; + + backward0 = new double[nb_state]; +# endif + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + + // forward recurrence + + seq_likelihood = 0.; + for (i = 0;i < seq.length[index];i++) { + norm[i] = 0.; + + for (j = 0;j < nb_state;j++) { + + // computation of the observation probabilities + + observation[i][j] = 1.; + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + observation[i][j] *= categorical_process[k]->observation[j]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + observation[i][j] *= discrete_parametric_process[k]->observation[j]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else if (continuous_parametric_process[k]->ident == LINEAR_MODEL) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + } + + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual); + } + + else if (continuous_parametric_process[k]->ident == AUTOREGRESSIVE_MODEL) { + if (i == 0) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + case REAL_VALUE : + residual = *proutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(pioutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(proutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + } + } + + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual); + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if (i == 0) { + state_norm[j] = initial[j]; + } + else { + state_norm[j] += state_in[i - 1][j] - forward1[i - 1][j]; + } + state_norm[j] *= observation[i][j]; + + norm[i] += state_norm[j]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + forward1[i][j] = initial[j]; + state_entropy[i][j] = 0.; + } + else { + forward1[i][j] = state_in[i - 1][j]; + state_entropy[i][j] = predicted_entropy[i - 1][j]; + } + forward1[i][j] *= observation[i][j]; + + norm[i] += forward1[i][j]; + break; + } + } + } + + if (norm[i] > 0.) { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + case SEMI_MARKOVIAN : + state_norm[j] /= norm[i]; + break; + case MARKOVIAN : + forward1[i][j] /= norm[i]; + break; + } + } + + seq_likelihood += log(norm[i]); + } + + else { + seq_likelihood = D_INF; + break; + } + + for (j = 0;j < nb_state;j++) { + + // case semi-Markovian state + + if (sojourn_type[j] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[j]; + obs_product = 1.; + forward1[i][j] = 0.; + + if (i < seq.length[index] - 1) { + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + obs_product *= observation[i - k + 1][j] / norm[i - k + 1]; + if (obs_product == 0.) { + break; + } + + if (k < i + 1) { + occupancy_predicted[k] = obs_product * occupancy->mass[k] * state_in[i - k][j]; +// forward1[i][j] += obs_product * occupancy->mass[k] * state_in[i - k][j]; + } + + else { + switch (type) { + case ORDINARY : + occupancy_predicted[k] = obs_product * occupancy->mass[k] * initial[j]; +// forward1[i][j] += obs_product * occupancy->mass[k] * initial[j]; + break; + case EQUILIBRIUM : + occupancy_predicted[k] = obs_product * forward[j]->mass[k] * initial[j]; +// forward1[i][j] += obs_product * forward[j]->mass[k] * initial[j]; + break; + } + } + + forward1[i][j] += occupancy_predicted[k]; + } + } + + else { + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + obs_product *= observation[i - k + 1][j] / norm[i - k + 1]; + if (obs_product == 0.) { + break; + } + + if (k < i + 1) { + occupancy_predicted[k] = obs_product * (1. - occupancy->cumul[k - 1]) * state_in[i - k][j]; +// forward1[i][j] += obs_product * (1. - occupancy->cumul[k - 1]) * state_in[i - k][j]; + } + + else { + switch (type) { + case ORDINARY : + occupancy_predicted[k] = obs_product * (1. - occupancy->cumul[k - 1]) * initial[j]; +// forward1[i][j] += obs_product * (1. - occupancy->cumul[k - 1]) * initial[j]; + break; + case EQUILIBRIUM : + occupancy_predicted[k] = obs_product * (1. - forward[j]->cumul[k - 1]) * initial[j]; +// forward1[i][j] += obs_product * (1. - forward[j]->cumul[k - 1]) * initial[j]; + break; + } + } + + forward1[i][j] += occupancy_predicted[k]; + } + } + + state_entropy[i][j] = 0.; + + if (forward1[i][j] > 0.) { + for (m = 1;m < k;m++) { + buff = occupancy_predicted[m] / forward1[i][j]; + if (buff > 0.) { + if (m < i + 1) { + state_entropy[i][j] += buff * (predicted_entropy[i - m][j] - log(buff)); + } + else { + state_entropy[i][j] -= buff * log(buff); + } + } + } + + if (state_entropy[i][j] < 0.) { + state_entropy[i][j] = 0.; + } + } + } + } + + if (i < seq.length[index] - 1) { + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + transition_predicted[k] = transition[k][j] * forward1[i][k]; + state_in[i][j] += transition_predicted[k]; +// state_in[i][j] += transition[k][j] * forward1[i][k]; + } + + predicted_entropy[i][j] = 0.; + + if (state_in[i][j] > 0.) { + for (k = 0;k < nb_state;k++) { + buff = transition_predicted[k] / state_in[i][j]; + if (buff > 0.) { + predicted_entropy[i][j] += buff * (state_entropy[i][k] - log(buff)); + } + } + + if (predicted_entropy[i][j] < 0.) { + predicted_entropy[i][j] = 0.; + } + } + } + } + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + } + + if (seq_likelihood != D_INF) { + entropy1 = 0.; + i = seq.length[index] - 1; + for (j = 0;j < nb_state;j++) { + if (forward1[i][j] > 0.) { + entropy1 += forward1[i][j] * (state_entropy[i][j] - log(forward1[i][j])); + } + } + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + for (j = 0;j < seq.length[index];j++) { + state_entropy[j][i] = 0.; + } + } + } + + // backward recurrence + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i]--; + break; + case REAL_VALUE : + proutput[i]--; + break; + } + } + + for (i = 0;i < nb_state;i++) { + for (j = 0;j < nb_state;j++) { + transition_entropy[i][j] = 0.; + } + } + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[i]; + for (j = occupancy->offset;j < MIN(seq.length[index] , occupancy->nb_value);j++) { + occupancy_entropy[i][j] = 0.; + } + } + } + + entropy2 = 0.; + + i = seq.length[index] - 1; + for (j = 0;j < nb_state;j++) { + backward[i][j] = forward1[i][j]; + backward1[i][j] = backward[i][j]; + + if (output == OUT_STATE) { + backward_output[i][j] = backward[i][j]; + } + + if (backward[i][j] > 0.) { + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + if (categorical_process[k]->observation[j]->mass[*pioutput[k]] > 0.) { + entropy2 -= backward[i][j] * log(categorical_process[k]->observation[j]->mass[*pioutput[k]]); + } + } + + else if (discrete_parametric_process[k]) { + if (discrete_parametric_process[k]->observation[j]->mass[*pioutput[k]] > 0.) { + entropy2 -= backward[i][j] * log(discrete_parametric_process[k]->observation[j]->mass[*pioutput[k]]); + } + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1])); + break; + case REAL_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1])); + break; + } + } + + else if (continuous_parametric_process[k]->ident == LINEAR_MODEL) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + } + + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual)); + } + + else if (continuous_parametric_process[k]->ident == AUTOREGRESSIVE_MODEL) { + if (i == 0) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + case REAL_VALUE : + residual = *proutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(pioutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(proutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + } + } + + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual)); + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2)); + break; + case REAL_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2)); + break; + } + } + } + } + } + } + + for (i = seq.length[index] - 2;i >= 0;i--) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]--; + break; + case REAL_VALUE : + proutput[j]--; + break; + } + } + + for (j = 0;j < nb_state;j++) { + auxiliary[j] = 0.; + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + obs_product = 1.; + + for (k = 1;k < MIN(seq.length[index] - i , occupancy->nb_value);k++) { + obs_product *= observation[i + k][j] / norm[i + k]; + if (obs_product == 0.) { + break; + } + + occupancy_auxiliary[k] = 0.; + + if (backward1[i + k][j] > 0.) { +// if (forward1[i + k][j] > 0.) { + if (k < seq.length[index] - i - 1) { + buff = backward1[i + k][j] * obs_product * occupancy->mass[k] / + forward1[i + k][j]; + occupancy_auxiliary[k] = buff * state_in[i][j]; + occupancy_entropy[j][k] += occupancy_auxiliary[k]; + +/* if (occupancy->mass[k] > 0.) { + entropy2 -= occupancy_auxiliary[k] * log(occupancy->mass[k]); + } */ + } + + else { + buff = obs_product * (1. - occupancy->cumul[k - 1]); + occupancy_auxiliary[k] = buff * state_in[i][j]; + if (occupancy->cumul[k - 1] < 1.) { + entropy2 -= occupancy_auxiliary[k] * log(1. - occupancy->cumul[k - 1]); + } + } + + auxiliary[j] += buff; + } + } + + sum = 0.; + for (m = k - 1;m >= 1;m--) { + sum += occupancy_auxiliary[m]; + if (backward[i + m][j] > 0.) { + buff = sum / backward[i + m][j]; + if (buff > 0.) { + state_entropy[i + m][j] += buff * (predicted_entropy[i][j] - log(buff)); + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (backward1[i + 1][j] > 0.) { +// if (forward1[i + 1][j] > 0.) { + auxiliary[j] = backward1[i + 1][j] / state_in[i][j]; + +/* auxiliary[j] = backward1[i + 1][j] * observation[i + 1][j] / + (forward1[i + 1][j] * norm[i + 1]); */ + + state_entropy[i + 1][j] = predicted_entropy[i][j]; + } + break; + } + } + } + + for (j = 0;j < nb_state;j++) { + backward1[i][j] = 0.; + + for (k = 0;k < nb_state;k++) { + buff = auxiliary[k] * transition[j][k] * forward1[i][j]; + backward1[i][j] += buff; + transition_entropy[j][k] += buff; + +/* if (transition[j][k] > 0.) { + entropy2 -= buff * log(transition[j][k]); + } */ + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + backward[i][j] = backward[i + 1][j] + backward1[i][j] - auxiliary[j] * state_in[i][j]; + if (backward[i][j] < 0.) { + backward[i][j] = 0.; + } + if (backward[i][j] > 1.) { + backward[i][j] = 1.; + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + backward[i][j] = backward1[i][j]; + break; + } + } + + if (backward[i][j] > 0.) { + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + if (categorical_process[k]->observation[j]->mass[*pioutput[k]] > 0.) { + entropy2 -= backward[i][j] * log(categorical_process[k]->observation[j]->mass[*pioutput[k]]); + } + } + + else if (discrete_parametric_process[k]) { + if (discrete_parametric_process[k]->observation[j]->mass[*pioutput[k]] > 0.) { + entropy2 -= backward[i][j] * log(discrete_parametric_process[k]->observation[j]->mass[*pioutput[k]]); + } + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1])); + break; + case REAL_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1])); + break; + } + } + + else if (continuous_parametric_process[k]->ident == LINEAR_MODEL) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + } + + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual)); + } + + else if (continuous_parametric_process[k]->ident == AUTOREGRESSIVE_MODEL) { + if (i == 0) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + case REAL_VALUE : + residual = *proutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(pioutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(proutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + } + } + + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual)); + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2)); + break; + case REAL_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2)); + break; + } + } + } + } + } + } + + switch (output) { + + case IN_STATE : { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + backward_output[i + 1][j] = auxiliary[j] * state_in[i][j]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + backward_output[i + 1][j] = 0.; + for (k = 0;k < nb_state;k++) { + if (k != j) { + backward_output[i + 1][j] += transition[k][j] * forward1[i][k]; + } + } + backward_output[i + 1][j] *= auxiliary[j]; + break; + } + } + } + break; + } + + case OUT_STATE : { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + backward_output[i][j] = backward1[i][j]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + backward_output[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if (k != j) { + backward_output[i][j] += auxiliary[k] * transition[j][k]; + } + } + backward_output[i][j] *= forward1[i][j]; + break; + } + } + } + break; + } + } + } + + if (output == IN_STATE) { + for (i = 0;i < nb_state;i++) { + backward_output[0][i] = backward[0][i]; + } + } + + for (i = 0;i < nb_state;i++) { + if (initial[i] > 0.) { + entropy2 -= backward[0][i] * log(initial[i]); + } + } + + for (i = 0;i < nb_state;i++) { + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > 0.) { + entropy2 -= transition_entropy[i][j] * log(transition[i][j]); + } + } + } + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[i]; + + if (initial[i] > 0.) { + obs_product = 1.; + +# ifdef DEBUG + backward0[i] = 0.; +# endif + + for (j = 1;j < MIN(seq.length[index] + 1 , occupancy->nb_value);j++) { + obs_product *= observation[j - 1][i] / norm[j - 1]; + if (obs_product == 0.) { + break; + } + + occupancy_auxiliary[j] = 0.; + + if (backward1[j - 1][i] > 0.) { +// if (forward1[j - 1][i] > 0.) { + if (j < seq.length[index]) { + switch (type) { + + case ORDINARY : { + occupancy_auxiliary[j] = backward1[j - 1][i] * obs_product * occupancy->mass[j] * + initial[i] / forward1[j - 1][i]; + occupancy_entropy[i][j] += occupancy_auxiliary[j]; + +/* if (occupancy->mass[j] > 0.) { + entropy2 -= occupancy_auxiliary[j] * log(occupancy->mass[j]); + } */ + break; + } + + case EQUILIBRIUM : { + occupancy_auxiliary[j] = backward1[j - 1][i] * obs_product * forward[i]->mass[j] * + initial[i] / forward1[j - 1][i]; + if (forward[i]->mass[j] > 0.) { + entropy2 -= occupancy_auxiliary[j] * log(forward[i]->mass[j]); + } + break; + } + } + } + + else { + switch (type) { + + case ORDINARY : { + occupancy_auxiliary[j] = obs_product * (1. - occupancy->cumul[j - 1]) * initial[i]; + if (occupancy->cumul[j - 1] < 1.) { + entropy2 -= occupancy_auxiliary[j] * log(1. - occupancy->cumul[j - 1]); + } + break; + } + + case EQUILIBRIUM : { + occupancy_auxiliary[j] = obs_product * (1. - forward[i]->cumul[j - 1]) * initial[i]; + if (forward[i]->cumul[j - 1] < 1.) { + entropy2 -= occupancy_auxiliary[j] * log(1. - forward[i]->cumul[j - 1]); + } + break; + } + } + } + +# ifdef DEBUG + backward0[i] += occupancy_auxiliary[j]; +# endif + + } + } + +# ifdef DEBUG + cout << i << " " << backward[0][i] << " " << backward0[i] << endl; +# endif + + sum = 0.; + for (k = j - 1;k >= 1;k--) { + sum += occupancy_auxiliary[k]; + if (backward[k - 1][i] > 0.) { + buff = sum / backward[k - 1][i]; + if (buff > 0.) { + state_entropy[k - 1][i] -= buff * log(buff); + } + } + } + } + + for (j = occupancy->offset;j < MIN(seq.length[index] , occupancy->nb_value);j++) { + if (occupancy->mass[j] > 0.) { + entropy2 -= occupancy_entropy[i][j] * log(occupancy->mass[j]); + } + } + } + } + + entropy2 += seq_likelihood; + +# ifdef MESSAGE + if ((entropy2 < entropy1 - DOUBLE_ERROR) || (entropy2 > entropy1 + DOUBLE_ERROR)) { + cout << "\nERROR: " << entropy1 << " " << entropy2 << endl; + } +# endif + + // restoration + + pstate = seq.int_sequence[index][0]; + + for (i = 0;i < seq.length[index];i++) { + backward_max = 0.; + for (j = 0;j < nb_state;j++) { + if (backward[i][j] > backward_max) { + backward_max = backward[i][j]; + *pstate = j; + } + } + + pstate++; + } + + seq.min_value[0] = 0; + seq.max_value[0] = nb_state - 1; + seq.build_marginal_frequency_distribution(0); + + state_seq_likelihood = SemiMarkov::likelihood_computation(seq , index); + + for (i = 0;i < seq.length[index];i++) { + partial_entropy[i] = 0.; + for (j = 0;j < nb_state;j++) { + if (state_entropy[i][j] < 0.) { + state_entropy[i][j] = 0.; + } + if (backward[i][j] > 0.) { + partial_entropy[i] += backward[i][j] * (state_entropy[i][j] - log(backward[i][j])); + } + } + if (partial_entropy[i] < 0.) { + partial_entropy[i] = 0.; + } + } + + conditional_entropy[0] = partial_entropy[0]; + for (i = 1;i < seq.length[index];i++) { + conditional_entropy[i] = partial_entropy[i] - partial_entropy[i - 1]; + } + + max_marginal_entropy = 0.; + for (i = 0;i < seq.length[index];i++) { + marginal_entropy[i] = 0.; + for (j = 0;j < nb_state;j++) { + if (backward[i][j] > 0.) { + marginal_entropy[i] -= backward[i][j] * log(backward[i][j]); + } + } + if (marginal_entropy[i] > max_marginal_entropy) { + max_marginal_entropy = marginal_entropy[i]; + } + } + + switch (format) { + + case ASCII : { + switch (output) { + case SSTATE : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + break; + case IN_STATE : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_IN_STATE_PROBABILITY] << "\n\n"; + break; + case OUT_STATE : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_OUT_STATE_PROBABILITY] << "\n\n"; + break; + } + +// seq.profile_ascii_print(*os , index , nb_state , backward_output , +// STAT_label[STATL_STATE]); + seq.profile_ascii_print(*os , index , nb_state , backward_output , conditional_entropy , + marginal_entropy , partial_entropy); + + *os << "\n" << STAT_label[STATL_LIKELIHOOD] << ": " << seq_likelihood + << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_LIKELIHOOD] << ": " << state_seq_likelihood + << " (" << exp(state_seq_likelihood - seq_likelihood) << ")" << endl; + break; + } + + case SPREADSHEET : { + switch (output) { + case SSTATE : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + break; + case IN_STATE : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_IN_STATE_PROBABILITY] << "\n\n"; + break; + case OUT_STATE : + *os << "\n" << SEQ_label[SEQL_POSTERIOR_OUT_STATE_PROBABILITY] << "\n\n"; + break; + } + +// seq.profile_spreadsheet_print(*os , index , nb_state , backward_output , +// STAT_label[STATL_STATE]); + seq.profile_spreadsheet_print(*os , index , nb_state , backward_output , conditional_entropy , + marginal_entropy , partial_entropy); + + *os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << seq_likelihood + << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_LIKELIHOOD] << "\t" << state_seq_likelihood + << "\t" << exp(state_seq_likelihood - seq_likelihood) << endl; + break; + } + + case GNUPLOT : { +// seq.profile_plot_print(*os , index , nb_state , backward_output); + seq.profile_plot_print(*os , index , nb_state , backward_output , conditional_entropy , + marginal_entropy , partial_entropy); + break; + } + + case PLOT : { + seq.profile_plotable_write((*plot_set)[1] , index , nb_state , backward_output); + seq.entropy_profile_plotable_write((*plot_set)[2] , index , conditional_entropy , NULL , + marginal_entropy); + seq.entropy_profile_plotable_write((*plot_set)[3] , index , partial_entropy); + break; + } + } + + if (format != GNUPLOT) { +/* double gini_index; + + gini_index = 0.; + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + gini_index += backward[i][j] * (1. - backward[i][j]); + } + } */ + + double entropy3 , nb_state_sequence; + + entropy3 = 0.; + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + if (backward[i][j] > 0.) { + entropy3 -= backward[i][j] * log(backward[i][j]); + } + } + } + + // computation of the number of state sequences + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + + // forward recurrence + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + + // computation of the indicator functions of the observation probabilities + + if (observation[i][j] > 0.) { + observation[i][j] = 1.; + } + + forward1[i][j] = 0.; + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + + if (i < seq.length[index] - 1) { + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (observation[i - k + 1][j] == 0.) { + break; + } + + if (k < i + 1) { + if (occupancy->mass[k] > 0.) { + forward1[i][j] += state_in[i - k][j]; + } + } + + else { + if (initial[j] > 0.) { + switch (type) { + + case ORDINARY : { + if (occupancy->mass[k] > 0.) { + forward1[i][j]++; + } + break; + } + + case EQUILIBRIUM : { + if (forward[j]->mass[k] > 0.) { + forward1[i][j]++; + } + break; + } + } + } + } + } + } + + else { + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (observation[i - k + 1][j] == 0.) { + break; + } + + if (k < i + 1) { + if (1. - occupancy->cumul[k - 1] > 0.) { + forward1[i][j] += state_in[i - k][j]; + } + } + + else { + if (initial[j] > 0.) { + switch (type) { + + case ORDINARY : { + if (1. - occupancy->cumul[k - 1] > 0.) { + forward1[i][j]++; + } + break; + } + + case EQUILIBRIUM : { + if (1. - forward[j]->cumul[k - 1] > 0.) { + forward1[i][j]++; + } + break; + } + } + } + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (observation[i][j] == 1.) { + if (i == 0) { + if (initial[j] > 0.) { + forward1[i][j] = 1.; + } + } + else { + forward1[i][j] = state_in[i - 1][j]; + } + } + break; + } + } + } + + if (i < seq.length[index] - 1) { + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if (transition[k][j] > 0.) { + state_in[i][j] += forward1[i][k]; + } + } + } + } + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + } + + nb_state_sequence = 0.; + i = seq.length[index] - 1; + for (j = 0;j < nb_state;j++) { + nb_state_sequence += forward1[i][j]; + } + + switch (format) { + case ASCII : +/* *os << "\n" << SEQ_label[SEQL_GINI_INDEX] << ": " << gini_index << " (" + << gini_index / seq.length[index] << ") " << SEQ_label[SEQL_UPPER_BOUND] << ": " + << seq.length[index] * (1. - 1. / nb_state) << " (" << 1. - 1. / nb_state + *os << ") " << SEQ_label[SEQL_UPPER_BOUND] << ": " + << seq.length[index] * log((double)nb_state) << " (" << log((double)nb_state) */ + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << entropy1 + << " (" << entropy1 / seq.length[index] << ") " << SEQ_label[SEQL_UPPER_BOUND] << ": " + << log((double)nb_state_sequence) << " (" + << log((double)nb_state_sequence) / seq.length[index] + << ")\n" << SEQ_label[SEQL_MARGINAL_ENTROPY_SUM] << ": " << entropy3 << " (" + << entropy3 / seq.length[index] << ")\n\n" + << SEQ_label[SEQL_NB_STATE_SEQUENCE] << ": " << nb_state_sequence << endl; + break; + case SPREADSHEET : +/* *os << "\n" << SEQ_label[SEQL_GINI_INDEX] << "\t" << gini_index << "\t" + << gini_index / seq.length[index] << "\t" << SEQ_label[SEQL_UPPER_BOUND] << "\t" + << seq.length[index] * (1. - 1. / nb_state) << "\t" << 1. - 1. / nb_state + *os << "\t" << SEQ_label[SEQL_UPPER_BOUND] << "\t" + << seq.length[index] * log((double)nb_state) << "\t" << log((double)nb_state) */ + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << "\t" << entropy1 + << "\t" << entropy1 / seq.length[index] << "\t" << SEQ_label[SEQL_UPPER_BOUND] << "\t" + << log((double)nb_state_sequence) << "\t" + << log((double)nb_state_sequence) / seq.length[index] + << "\n" << SEQ_label[SEQL_MARGINAL_ENTROPY_SUM] << "\t" << entropy3 << "\t" + << entropy3 / seq.length[index] << "\n\n" + << SEQ_label[SEQL_NB_STATE_SEQUENCE] << "\t" << nb_state_sequence << endl; + break; + } + +# ifdef DEBUG + int state; + double min_nb_state_sequence , smoothed_proba , cumul_smoothed_proba , + max_smoothed_proba , **backward2; + + // backward recurrence + + min_nb_state_sequence = nb_state_sequence; + + backward2 = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward2[i] = new double[nb_state]; + } + + i = seq.length[index] - 1; + for (j = 0;j < nb_state;j++) { + backward2[i][j] = forward1[i][j]; + backward1[i][j] = 1.; + } + + for (i = seq.length[index] - 2;i >= 0;i--) { + for (j = 0;j < nb_state;j++) { + auxiliary[j] = 0.; + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + + for (k = 1;k < MIN(seq.length[index] - i , occupancy->nb_value);k++) { + if (observation[i + k][j] == 0.) { + break; + } + + if (k < seq.length[index] - i - 1) { + if (occupancy->mass[k] > 0.) { + auxiliary[j] += backward1[i + k][j]; + } + } + else { + if (1. - occupancy->cumul[k - 1] > 0.) { + auxiliary[j]++; + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (observation[i + 1][j] == 1.) { + auxiliary[j] = backward1[i + 1][j]; + } + break; + } + } + } + + for (j = 0;j < nb_state;j++) { + backward1[i][j] = 0.; + + for (k = 0;k < nb_state;k++) { + if (transition[j][k] > 0.) { + backward1[i][j] += auxiliary[k]; + } + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + +# ifdef DEBUG + if ((i == 0) && (initial[j] > 0.)) { + occupancy = state_process->sojourn_time[j]; + backward0[j] = 0.; + + for (k = 1;k < MIN(seq.length[index] + 1 , occupancy->nb_value);k++) { + if (observation[k - 1][j] == 0.) { + break; + } + + if (k < seq.length[index]) { + if (occupancy->mass[k] > 0.) { + backward0[j] += backward1[k - 1][j]; + } + } + else { + if (1. - occupancy->cumul[k - 1] > 0.) { + backward0[j]++; + } + } + } + } +# endif + + backward2[i][j] = backward2[i + 1][j] + backward1[i][j] * forward1[i][j] - + auxiliary[j] * state_in[i][j]; + +# ifdef DEBUG + if ((i == 0) && (initial[j] > 0.)) { + cout << j << " " << backward2[i][j] << " " << backward0[j] << endl; + } +# endif + + break; + } + + // case Markovian state + + case MARKOVIAN : { + backward2[i][j] = backward1[i][j] * forward1[i][j]; + break; + } + } + } + + smoothed_proba = 1.1; + cumul_smoothed_proba = 0.; + nb_state_sequence = 0; + + for (j = 0;j < nb_state;j++) { + max_smoothed_proba = 0.; + for (k = 0;k < nb_state;k++) { + if ((backward[i][k] > max_smoothed_proba) && (backward[i][k] < smoothed_proba)) { + max_smoothed_proba = backward[i][k]; + state = k; + } + } + cumul_smoothed_proba += max_smoothed_proba; + nb_state_sequence += backward2[i][state]; + + if (cumul_smoothed_proba < 1. - MIN_SMOOTHED_PROBABILITY) { + smoothed_proba = max_smoothed_proba; + } + else { + break; + } + } + + if (nb_state_sequence < min_nb_state_sequence) { + min_nb_state_sequence = nb_state_sequence; + } + } + + cout << SEQ_label[SEQL_NB_STATE_SEQUENCE] + << " (" << 1. - MIN_SMOOTHED_PROBABILITY << " beam)" + << ": " << min_nb_state_sequence << endl; + + cout << "\n"; + for (i = 0;i < seq.length[index];i++) { + obs_product = 0.; + for (j = 0;j < nb_state;j++) { + cout << backward2[i][j] << " (" << backward[i][j] << ") "; + obs_product += backward2[i][j]; + } + cout << "| " << obs_product << endl; + } + + for (i = 0;i < seq.length[index];i++) { + delete [] backward2[i]; + } + delete [] backward2; +# endif + + } + } + + for (i = 0;i < seq.length[index];i++) { + delete [] observation[i]; + } + delete [] observation; + + delete [] norm; + delete [] state_norm; + + for (i = 0;i < seq.length[index];i++) { + delete [] forward1[i]; + } + delete [] forward1; + + for (i = 0;i < seq.length[index] - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + for (i = 0;i < seq.length[index];i++) { + delete [] backward[i]; + } + delete [] backward; + + for (i = 0;i < seq.length[index];i++) { + delete [] backward1[i]; + } + delete [] backward1; + + delete [] auxiliary; + delete [] occupancy_auxiliary; + + if (output != SSTATE) { + for (i = 0;i < seq.length[index];i++) { + delete [] backward_output[i]; + } + delete [] backward_output; + } + + delete [] transition_predicted; + delete [] occupancy_predicted; + + for (i = 0;i < seq.length[index];i++) { + delete [] state_entropy[i]; + } + delete [] state_entropy; + + for (i = 0;i < seq.length[index];i++) { + delete [] predicted_entropy[i]; + } + delete [] predicted_entropy; + + for (i = 0;i < nb_state;i++) { + delete [] transition_entropy[i]; + } + delete [] transition_entropy; + + for (i = 0;i < nb_state;i++) { + delete [] occupancy_entropy[i]; + } + delete [] occupancy_entropy; + + delete [] partial_entropy; + delete [] conditional_entropy; + delete [] marginal_entropy; + +# ifdef DEBUG + delete [] backward0; +# endif + + delete [] pioutput; + delete [] proutput; + + return seq_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation of state sequences for an observed sequence using + * the forward-backward algorithm for sampling. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index, + * \param[in] os stream, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] nb_state_sequence number of state sequences. + * + * \return log-likelihood for the observed sequence. + */ +/*--------------------------------------------------------------*/ + +double HiddenSemiMarkov::forward_backward_sampling(const MarkovianSequences &seq , int index , + ostream &os , output_format format , + int nb_state_sequence) const + +{ + int i , j , k; + int state_occupancy , *pstate , **pioutput; + double seq_likelihood , state_seq_likelihood , obs_product , residual , **observation , *norm , + *state_norm , **forward1 , **state_in , *backward , *cumul_backward , **proutput; + DiscreteParametric *occupancy; + +# ifdef DEBUG + int m; + double sum; +# endif + + + // initializations + + observation = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + observation[i] = new double[nb_state]; + } + + norm = new double[seq.length[index]]; + state_norm = new double[nb_state]; + + forward1 = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + forward1[i] = new double[nb_state]; + } + + state_in = new double*[seq.length[index] - 1]; + for (i = 0;i < seq.length[index] - 1;i++) { + state_in[i] = new double[nb_state]; + } + + backward = new double[seq.length[index] + 1]; + cumul_backward = new double[seq.length[index] + 1]; + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + +# ifdef DEBUG + double **state_sequence_probability; + + + state_sequence_probability = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + state_sequence_probability[i] = new double[nb_state]; + for (j = 0;j < nb_state;j++) { + state_sequence_probability[i][j] = 0.; + } + } +# endif + + // forward recurrence + + seq_likelihood = 0.; + for (i = 0;i < seq.length[index];i++) { + norm[i] = 0.; + + for (j = 0;j < nb_state;j++) { + + // computation of the observation probabilities + + observation[i][j] = 1.; + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + observation[i][j] *= categorical_process[k]->observation[j]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + observation[i][j] *= discrete_parametric_process[k]->observation[j]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else if (continuous_parametric_process[k]->ident == LINEAR_MODEL) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + } + + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual); + } + + else if (continuous_parametric_process[k]->ident == AUTOREGRESSIVE_MODEL) { + if (i == 0) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + case REAL_VALUE : + residual = *proutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(pioutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(proutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + } + } + + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual); + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + observation[i][j] *= continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if (i == 0) { + state_norm[j] = initial[j]; + } + else { + state_norm[j] += state_in[i - 1][j] - forward1[i - 1][j]; + } + state_norm[j] *= observation[i][j]; + + norm[i] += state_norm[j]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + forward1[i][j] = initial[j]; + } + else { + forward1[i][j] = state_in[i - 1][j]; + } + forward1[i][j] *= observation[i][j]; + + norm[i] += forward1[i][j]; + break; + } + } + } + + if (norm[i] > 0.) { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + case SEMI_MARKOVIAN : + state_norm[j] /= norm[i]; + break; + case MARKOVIAN : + forward1[i][j] /= norm[i]; + break; + } + } + + seq_likelihood += log(norm[i]); + } + + else { + seq_likelihood = D_INF; + break; + } + + for (j = 0;j < nb_state;j++) { + + // case semi-Markovian state + + if (sojourn_type[j] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[j]; + obs_product = 1.; + forward1[i][j] = 0.; + + if (i < seq.length[index] - 1) { + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + obs_product *= observation[i - k + 1][j] / norm[i - k + 1]; + if (obs_product == 0.) { + break; + } + + if (k < i + 1) { + forward1[i][j] += obs_product * occupancy->mass[k] * state_in[i - k][j]; + } + + else { + switch (type) { + case ORDINARY : + forward1[i][j] += obs_product * occupancy->mass[k] * initial[j]; + break; + case EQUILIBRIUM : + forward1[i][j] += obs_product * forward[j]->mass[k] * initial[j]; + break; + } + } + } + } + + else { + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + obs_product *= observation[i - k + 1][j] / norm[i - k + 1]; + if (obs_product == 0.) { + break; + } + + if (k < i + 1) { + forward1[i][j] += obs_product * (1. - occupancy->cumul[k - 1]) * state_in[i - k][j]; + } + + else { + switch (type) { + case ORDINARY : + forward1[i][j] += obs_product * (1. - occupancy->cumul[k - 1]) * initial[j]; + break; + case EQUILIBRIUM : + forward1[i][j] += obs_product * (1. - forward[j]->cumul[k - 1]) * initial[j]; + break; + } + } + } + } + } + } + + if (i < seq.length[index] - 1) { + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + state_in[i][j] += transition[k][j] * forward1[i][k]; + } + } + } + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + } + + if (seq_likelihood != D_INF) { + + // backward passes + +# ifdef MESSAGE + cout << "\n"; +# endif + + for (i = 0;i < nb_state_sequence;i++) { + j = seq.length[index] - 1; + pstate = seq.int_sequence[index][0] + j; + stat_tool::cumul_computation(nb_state , forward1[j] , cumul_backward); + *pstate = cumul_method(nb_state , cumul_backward); + + do { + + // case semi-Markovian state + + if (sojourn_type[*pstate] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[*pstate]; + obs_product = 1.; + + if (j < seq.length[index] - 1) { + for (k = 1;k <= MIN(j + 1 , occupancy->nb_value - 1);k++) { + obs_product *= observation[j - k + 1][*pstate] / norm[j - k + 1]; + if (obs_product == 0.) { + break; + } + + if (k < j + 1) { + backward[k] = obs_product * occupancy->mass[k] * state_in[j - k][*pstate] / + forward1[j][*pstate]; + } + + else { + switch (type) { + case ORDINARY : + backward[k] = obs_product * occupancy->mass[k] * initial[*pstate] / + forward1[j][*pstate]; + break; + case EQUILIBRIUM : + backward[k] = obs_product * forward[*pstate]->mass[k] * initial[*pstate] / + forward1[j][*pstate]; + break; + } + } + } + } + + else { + for (k = 1;k <= MIN(j + 1 , occupancy->nb_value - 1);k++) { + obs_product *= observation[j - k + 1][*pstate] / norm[j - k + 1]; + if (obs_product == 0.) { + break; + } + + if (k < j + 1) { + backward[k] = obs_product * (1. - occupancy->cumul[k - 1]) * state_in[j - k][*pstate] / + forward1[j][*pstate]; + } + + else { + switch (type) { + case ORDINARY : + backward[k] = obs_product * (1. - occupancy->cumul[k - 1]) * initial[*pstate] / + forward1[j][*pstate]; + break; + case EQUILIBRIUM : + backward[k] = obs_product * (1. - forward[*pstate]->cumul[k - 1]) * initial[*pstate] / + forward1[j][*pstate]; + break; + } + } + } + } + + stat_tool::cumul_computation(k - 1 , backward + 1 , cumul_backward); + state_occupancy = 1 + cumul_method(k - 1 , cumul_backward); + +# ifdef DEBUG + sum = 0.; + for (m = 1;m < k;m++) { + sum += backward[m]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << j << " " << sum << endl; + } +# endif + + for (k = 1;k < state_occupancy;k++) { + pstate--; + *pstate = *(pstate + 1); + } + j -= (state_occupancy - 1); + + if (j == 0) { + break; + } + } + + j--; + for (k = 0;k < nb_state;k++) { + backward[k] = transition[k][*pstate] * forward1[j][k] / state_in[j][*pstate]; + } + stat_tool::cumul_computation(nb_state , backward , cumul_backward); + *--pstate = cumul_method(nb_state , cumul_backward); + +# ifdef DEBUG + sum = 0.; + for (k = 0;k < nb_state;k++) { + sum += backward[k]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << j << " " << sum << endl; + } +# endif + + } + while (j > 0); + +# ifdef DEBUG + pstate = seq.int_sequence[index][0]; + for (j = 0;j < seq.length[index];j++) { + state_sequence_probability[j][*pstate++]++; + } +# endif + +# ifdef MESSAGE + state_seq_likelihood = SemiMarkov::likelihood_computation(seq , index); + + pstate = seq.int_sequence[index][0]; + + switch (format) { + + case ASCII : { + for (j = 0;j < seq.length[index];j++) { + os << *pstate++ << " "; + } + + os << " " << i + 1 << " " << state_seq_likelihood + << " (" << exp(state_seq_likelihood - seq_likelihood) << ")" << endl; + break; + } + + case SPREADSHEET : { + for (j = 0;j < seq.length[index];j++) { + os << *pstate++ << "\t"; + } + + os << "\t" << i + 1 << "\t" << state_seq_likelihood + << "\t" << exp(state_seq_likelihood - seq_likelihood) << endl; + break; + } + } +# endif + + } + +# ifdef DEBUG + if (nb_state_sequence >= 1000) { + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + state_sequence_probability[i][j] /= nb_state_sequence; + } + } + + pstate = seq.int_sequence[index][0]; + for (j = 0;j < seq.length[index];j++) { + *pstate++ = I_DEFAULT; + } + + os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + seq.profile_ascii_print(os , index , nb_state , state_sequence_probability , + STAT_label[STATL_STATE]); + } +# endif + + } + + for (i = 0;i < seq.length[index];i++) { + delete [] observation[i]; + } + delete [] observation; + + delete [] norm; + delete [] state_norm; + + for (i = 0;i < seq.length[index];i++) { + delete [] forward1[i]; + } + delete [] forward1; + + for (i = 0;i < seq.length[index] - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + delete [] backward; + delete [] cumul_backward; + + delete [] pioutput; + delete [] proutput; + +# ifdef DEBUG + for (i = 0;i < seq.length[index];i++) { + delete [] state_sequence_probability[i]; + } + delete [] state_sequence_probability; +# endif + + return seq_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-parameters of a hidden semi-Markov chain. + */ +/*--------------------------------------------------------------*/ + +void HiddenSemiMarkov::log_computation() + +{ + int i , j; + double *pcumul; + DiscreteParametric *occupancy; + + + Chain::log_computation(); + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[i]; + + if (occupancy->mass[occupancy->offset] > 0.) { + stat_tool::log_computation(occupancy->nb_value , occupancy->mass , occupancy->mass); + + pcumul = occupancy->cumul; + for (j = 0;j < occupancy->nb_value;j++) { + *pcumul = 1. - *pcumul; + pcumul++; + } + stat_tool::log_computation(occupancy->nb_value , occupancy->cumul , occupancy->cumul); + + if (type == EQUILIBRIUM) { + stat_tool::log_computation(forward[i]->nb_value , forward[i]->mass , forward[i]->mass); + + pcumul = forward[i]->cumul; + for (j = 0;j < forward[i]->nb_value;j++) { + *pcumul = 1. - *pcumul; + pcumul++; + } + stat_tool::log_computation(forward[i]->nb_value , forward[i]->cumul , forward[i]->cumul); + } + } + } + } + + for (i = 0;i < nb_output_process;i++) { + if (categorical_process[i]) { + for (j = 0;j < nb_state;j++) { + categorical_process[i]->observation[j]->log_computation(); + } + } + + else if (discrete_parametric_process[i]) { + for (j = 0;j < nb_state;j++) { + stat_tool::log_computation(discrete_parametric_process[i]->nb_value , + discrete_parametric_process[i]->observation[j]->mass , + discrete_parametric_process[i]->observation[j]->cumul); + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the most probable state sequences using the Viterbi algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] posterior_probability pointer on the posterior probabilities of the most probable state sequences, + * \param[in] index sequence index. + * + * \return log-likelihood for the most probable state sequences. + */ +/*--------------------------------------------------------------*/ + +double HiddenSemiMarkov::viterbi(const MarkovianSequences &seq , + double *posterior_probability , int index) const + +{ + int i , j , k , m; + int length , *pstate , **pioutput , **input_state , **optimal_state , + **optimal_occupancy; + double likelihood = 0. , obs_product , buff , residual , forward_max , **observation , + *forward1 , **state_in , **proutput; + DiscreteParametric *occupancy; + + + // initializations + + length = (index == I_DEFAULT ? seq.max_length : seq.length[index]); + + observation = new double*[length]; + for (i = 0;i < length;i++) { + observation[i] = new double[nb_state]; + } + + forward1 = new double[nb_state]; + + state_in = new double*[length - 1]; + for (i = 0;i < length - 1;i++) { + state_in[i] = new double[nb_state]; + } + + input_state = new int*[length - 1]; + for (i = 0;i < length - 1;i++) { + input_state[i] = new int[nb_state]; + } + + optimal_state = new int*[length]; + for (i = 0;i < length;i++) { + optimal_state[i] = new int[nb_state]; + } + + optimal_occupancy = new int*[length]; + for (i = 0;i < length;i++) { + optimal_occupancy[i] = new int[nb_state]; + } + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + for (i = 0;i < seq.nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j + 1]; + break; + } + } + + // forward recurrence + + for (j = 0;j < seq.length[i];j++) { + for (k = 0;k < nb_state;k++) { + + // computation of the observation probabilities + + observation[j][k] = 0.; + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + buff = categorical_process[m]->observation[k]->cumul[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + buff = discrete_parametric_process[m]->observation[k]->cumul[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1]); + break; + } + } + + else if (continuous_parametric_process[m]->ident == LINEAR_MODEL) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->intercept + + continuous_parametric_process[m]->observation[k]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? j : seq.index_parameter[i][j])); + break; + } + + buff = continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else if (continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (j == 0) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + case REAL_VALUE : + residual = *proutput[m] - continuous_parametric_process[m]->observation[k]->location; + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(pioutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[k]->location + + continuous_parametric_process[m]->observation[k]->autoregressive_coeff * + (*(proutput[m] - 1) - continuous_parametric_process[m]->observation[k]->location)); + break; + } + } + + buff = continuous_parametric_process[m]->observation[k]->mass_computation(residual , residual); + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[m]->observation[k]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[m]->observation[k]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + observation[j][k] = D_INF; + break; + } + else { + observation[j][k] += buff; + } + } + + switch (sojourn_type[k]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[k]; + obs_product = 0.; + forward1[k] = D_INF; + + if (j < seq.length[i] - 1) { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + if (observation[j - m + 1][k] == D_INF) { + break; + } + else { + obs_product += observation[j - m + 1][k]; + } + + if (m < j + 1) { + buff = obs_product + occupancy->mass[m] + state_in[j - m][k]; + } + + else { + switch (type) { + case ORDINARY : + buff = obs_product + occupancy->mass[m] + cumul_initial[k]; + break; + case EQUILIBRIUM : + buff = obs_product + forward[k]->mass[m] + cumul_initial[k]; + break; + } + } + + if (buff > forward1[k]) { + forward1[k] = buff; + if (m < j + 1) { + optimal_state[j][k] = input_state[j - m][k]; + } + optimal_occupancy[j][k] = m; + } + } + } + + else { + for (m = 1;m <= MIN(j + 1 , occupancy->nb_value - 1);m++) { + if (observation[j - m + 1][k] == D_INF) { + break; + } + else { + obs_product += observation[j - m + 1][k]; + } + + if (m < j + 1) { + buff = obs_product + occupancy->cumul[m - 1] + state_in[j - m][k]; + } + + else { + switch (type) { + case ORDINARY : + buff = obs_product + occupancy->cumul[m - 1] + cumul_initial[k]; + break; + case EQUILIBRIUM : + buff = obs_product + forward[k]->cumul[m - 1] + cumul_initial[k]; + break; + } + } + + if (buff > forward1[k]) { + forward1[k] = buff; + if (m < j + 1) { + optimal_state[j][k] = input_state[j - m][k]; + } + optimal_occupancy[j][k] = m; + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (j == 0) { + forward1[k] = cumul_initial[k]; + } + else { + forward1[k] = state_in[j - 1][k]; + optimal_state[j][k] = input_state[j - 1][k]; + } + optimal_occupancy[j][k] = 1; + + if (forward1[k] != D_INF) { + if (observation[j][k] == D_INF) { + forward1[k] = D_INF; + } + else { + forward1[k] += observation[j][k]; + } + } + break; + } + } + } + +# ifdef DEBUG + cout << j << " : "; + for (k = 0;k < nb_state;k++) { + cout << forward1[k]; + if (forward1[k] != D_INF) { + cout << " " << optimal_occupancy[j][k] << " " << optimal_state[j][k]; + } + cout << " | "; + } + cout << endl; +# endif + + if (j < seq.length[i] - 1) { + for (k = 0;k < nb_state;k++) { + state_in[j][k] = D_INF; + for (m = 0;m < nb_state;m++) { + buff = cumul_transition[m][k] + forward1[m]; + if (buff > state_in[j][k]) { + state_in[j][k] = buff; + input_state[j][k] = m; + } + } + } + } + + for (k = 0;k < nb_output_process;k++) { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + } + + // extraction of the log-likelihood for the most probable state sequence + + pstate = seq.int_sequence[i][0] + seq.length[i] - 1; + forward_max = D_INF; + + for (j = 0;j < nb_state;j++) { + if (forward1[j] > forward_max) { + forward_max = forward1[j]; + *pstate = j; + } + } + + if (forward_max != D_INF) { + likelihood += forward_max; + if (posterior_probability) { + posterior_probability[i] = forward_max; + } + } + + else { + likelihood = D_INF; + if (posterior_probability) { + posterior_probability[i] = 0.; + } + break; + } + + // restoration of the most probable state sequence + + j = seq.length[i] - 1; + + do { + for (k = 0;k < optimal_occupancy[j][*pstate] - 1;k++) { + pstate--; + *pstate = *(pstate + 1); + } + + if (j >= optimal_occupancy[j][*pstate]) { + pstate--; + *pstate = optimal_state[j][*(pstate + 1)]; + j -= optimal_occupancy[j][*(pstate + 1)]; + } + else { + j -= optimal_occupancy[j][*pstate]; + } + } + while (j >= 0); + } + } + + for (i = 0;i < length;i++) { + delete [] observation[i]; + } + delete [] observation; + + delete [] forward1; + + for (i = 0;i < length - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + for (i = 0;i < length - 1;i++) { + delete [] input_state[i]; + } + delete [] input_state; + + for (i = 0;i < length;i++) { + delete [] optimal_state[i]; + } + delete [] optimal_state; + + for (i = 0;i < length;i++) { + delete [] optimal_occupancy[i]; + } + delete [] optimal_occupancy; + + delete [] pioutput; + delete [] proutput; + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the most probable state sequences using the Viterbi algorithm. + * + * \param[in] seq reference on a SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +void HiddenSemiMarkov::viterbi(SemiMarkovData &seq) const + +{ + seq.posterior_probability = new double[seq.nb_sequence]; + seq.restoration_likelihood = viterbi(seq , seq.posterior_probability); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the N most probable state sequences for + * an observed sequence using the generalized Viterbi algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index, + * \param[in] os stream, + * \param[in] seq_likelihood log-likelihood for the observed sequence, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] inb_state_sequence number of state sequences. + * + * \return log-likelihood for the most probable state sequence. + */ +/*--------------------------------------------------------------*/ + +double HiddenSemiMarkov::generalized_viterbi(const MarkovianSequences &seq , int index , + ostream &os , double seq_likelihood , + output_format format , int inb_state_sequence) const + +{ + bool **active_cell; + int i , j , k , m; + int nb_state_sequence , max_occupancy , brank , previous_rank , nb_cell , *rank , + *pstate , **pioutput , ***input_state , ***optimal_state , ***optimal_occupancy , + ***input_rank , ***optimal_rank; + double buff , residual , forward_max , state_seq_likelihood , likelihood_cumul , + *obs_product , **observation , **forward1 , **proutput , ***state_in; + DiscreteParametric *occupancy; + + + // initializations + + observation = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + observation[i] = new double[nb_state]; + } + + obs_product = new double[seq.length[index] + 1]; + + forward1 = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + forward1[i] = new double[inb_state_sequence]; + } + + state_in = new double**[seq.length[index] - 1]; + for (i = 0;i < seq.length[index] - 1;i++) { + state_in[i] = new double*[nb_state]; + for (j = 0;j < nb_state;j++) { + state_in[i][j] = new double[inb_state_sequence]; + } + } + + rank = new int[MAX(seq.length[index] + 1 , nb_state)]; + + input_state = new int**[seq.length[index] - 1]; + for (i = 0;i < seq.length[index] - 1;i++) { + input_state[i] = new int*[nb_state]; + for (j = 0;j < nb_state;j++) { + input_state[i][j] = new int[inb_state_sequence]; + } + } + + optimal_state = new int**[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + optimal_state[i] = new int*[nb_state]; + for (j = 0;j < nb_state;j++) { + optimal_state[i][j] = new int[inb_state_sequence]; + } + } + + optimal_occupancy = new int**[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + optimal_occupancy[i] = new int*[nb_state]; + for (j = 0;j < nb_state;j++) { + optimal_occupancy[i][j] = new int[inb_state_sequence]; + } + } + + input_rank = new int**[seq.length[index] - 1]; + for (i = 0;i < seq.length[index] - 1;i++) { + input_rank[i] = new int*[nb_state]; + for (j = 0;j < nb_state;j++) { + input_rank[i][j] = new int[inb_state_sequence]; + } + } + + optimal_rank = new int**[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + optimal_rank[i] = new int*[nb_state]; + for (j = 0;j < nb_state;j++) { + optimal_rank[i][j] = new int[inb_state_sequence]; + } + } + + active_cell = new bool*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + active_cell[i] = new bool[nb_state]; + for (j = 0;j < nb_state;j++) { + active_cell[i][j] = false; + } + } + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + nb_state_sequence = 1; + +# ifdef DEBUG + double entropy = 0. , **state_sequence_probability; + + + state_sequence_probability = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + state_sequence_probability[i] = new double[nb_state]; + for (j = 0;j < nb_state;j++) { +// state_sequence_probability[i][j] = 0.; + state_sequence_probability[i][j] = D_INF; + } + } +# endif + + // forward recurrence + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + + // computation of the observation probabilities + + observation[i][j] = 0.; + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + buff = categorical_process[k]->observation[j]->cumul[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + buff = discrete_parametric_process[k]->observation[j]->cumul[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else if (continuous_parametric_process[k]->ident == LINEAR_MODEL) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + } + + buff = continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual); + } + + else if (continuous_parametric_process[k]->ident == AUTOREGRESSIVE_MODEL) { + if (i == 0) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + case REAL_VALUE : + residual = *proutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(pioutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(proutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + } + } + + buff = continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual); + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + observation[i][j] = D_INF; + break; + } + else { + observation[i][j] += buff; + } + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + + obs_product[0] = 0.; + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (observation[i - k + 1][j] == D_INF) { + break; + } + else { + obs_product[k] = obs_product[k - 1] + observation[i - k + 1][j]; + } + } + max_occupancy = k - 1; + + for (k = 1;k <= max_occupancy;k++) { + rank[k] = 0; + } + + for (k = 0;k < nb_state_sequence;k++) { + forward1[j][k] = D_INF; + + if (i < seq.length[index] - 1) { + for (m = 1;m <= max_occupancy;m++) { + if (m < i + 1) { + buff = obs_product[m] + occupancy->mass[m] + state_in[i - m][j][rank[m]]; + } + + else { + if (rank[i + 1] == 0) { + switch (type) { + case ORDINARY : + buff = obs_product[m] + occupancy->mass[m] + cumul_initial[j]; + break; + case EQUILIBRIUM : + buff = obs_product[m] + forward[j]->mass[m] + cumul_initial[j]; + break; + } + } + + else { + buff = D_INF; + } + } + + if (buff > forward1[j][k]) { + forward1[j][k] = buff; + if (m < i + 1) { + optimal_state[i][j][k] = input_state[i - m][j][rank[m]]; + optimal_rank[i][j][k] = input_rank[i - m][j][rank[m]]; + } + optimal_occupancy[i][j][k] = m; + } + } + } + + else { + for (m = 1;m <= max_occupancy;m++) { + if (m < i + 1) { + buff = obs_product[m] + occupancy->cumul[m - 1] + state_in[i - m][j][rank[m]]; + } + + else { + if (rank[i + 1] == 0) { + switch (type) { + case ORDINARY : + buff = obs_product[m] + occupancy->cumul[m - 1] + cumul_initial[j]; + break; + case EQUILIBRIUM : + buff = obs_product[m] + forward[j]->cumul[m - 1] + cumul_initial[j]; + break; + } + } + + else { + buff = D_INF; + } + } + + if (buff > forward1[j][k]) { + forward1[j][k] = buff; + if (m < i + 1) { + optimal_state[i][j][k] = input_state[i - m][j][rank[m]]; + optimal_rank[i][j][k] = input_rank[i - m][j][rank[m]]; + } + optimal_occupancy[i][j][k] = m; + } + } + } + + if (forward1[j][k] != D_INF) { + rank[optimal_occupancy[i][j][k]]++; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + for (k = 0;k < nb_state_sequence;k++) { + if (i == 0) { + forward1[j][k] = cumul_initial[j]; + } + else { + forward1[j][k] = state_in[i - 1][j][k]; + optimal_state[i][j][k] = input_state[i - 1][j][k]; + optimal_rank[i][j][k] = input_rank[i - 1][j][k]; + } + optimal_occupancy[i][j][k] = 1; + + if (forward1[j][k] != D_INF) { + if (observation[i][j] == D_INF) { + forward1[j][k] = D_INF; + } + else { + forward1[j][k] += observation[i][j]; + } + } + } + break; + } + } + + for (k = nb_state_sequence;k < inb_state_sequence;k++) { + forward1[j][k] = D_INF; + } + } + +# ifdef DEBUG + cout << i << " : "; + for (j = 0;j < nb_state;j++) { + cout << j << " :"; + for (k = 0;k < nb_state_sequence;k++) { + cout << " " << forward1[j][k]; + if (forward1[j][k] != D_INF) { + cout << " " << optimal_occupancy[i][j][k]; + if (optimal_occupancy[i][j][k] < i + 1) { + cout << " " << optimal_state[i][j][k] << " " << optimal_rank[i][j][k]; + } + } + cout << " |"; + } + cout << "| "; + } + cout << endl; +# endif + + if (i < seq.length[index] - 1) { + if (nb_state_sequence < inb_state_sequence) { + if (nb_state_sequence * nb_state < inb_state_sequence) { + nb_state_sequence *= nb_state; + } + else { + nb_state_sequence = inb_state_sequence; + } + } + + for (j = 0;j < nb_state;j++) { + for (k = 0;k < nb_state;k++) { + rank[k] = 0; + } + + for (k = 0;k < nb_state_sequence;k++) { + state_in[i][j][k] = D_INF; + for (m = 0;m < nb_state;m++) { + buff = cumul_transition[m][j] + forward1[m][rank[m]]; + if (buff > state_in[i][j][k]) { + state_in[i][j][k] = buff; + input_state[i][j][k] = m; + input_rank[i][j][k] = rank[m]; + } + } + + if (state_in[i][j][k] != D_INF) { + rank[input_state[i][j][k]]++; + } + } + + for (k = nb_state_sequence;k < inb_state_sequence;k++) { + state_in[i][j][k] = D_INF; + } + } + +# ifdef DEBUG + cout << i << " : "; + for (j = 0;j < nb_state;j++) { + cout << j << " :"; + for (k = 0;k < nb_state_sequence;k++) { + cout << " " << state_in[i][j][k]; + if (state_in[i][j][k] != D_INF) { + cout << " " << input_state[i][j][k] << " " << input_rank[i][j][k]; + } + cout << " |"; + } + cout << "| "; + } + cout << endl; +# endif + + } + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + } + + // extraction of the log-likelihood for the most probable state sequence + + for (i = 0;i < nb_state;i++) { + rank[i] = 0; + } + likelihood_cumul = 0.; + + for (i = 0;i < nb_state_sequence;i++) { + pstate = seq.int_sequence[index][0] + seq.length[index] - 1; + forward_max = D_INF; + + for (j = 0;j < nb_state;j++) { + if (forward1[j][rank[j]] > forward_max) { + forward_max = forward1[j][rank[j]]; + *pstate = j; + } + } + + if (i == 0) { + state_seq_likelihood = forward_max; + } + + if (forward_max == D_INF) { + break; + } + + // restoration of the most probable state sequence + + brank = rank[*pstate]; + rank[*pstate]++; + j = seq.length[index] - 1; + +# ifdef DEBUG + cout << "\n" << *pstate << " " << optimal_occupancy[j][*pstate][brank] << " " << brank << " | "; +# endif + + do { + for (k = 0;k < optimal_occupancy[j][*pstate][brank];k++) { + active_cell[j - k][*pstate] = true; + } + + for (k = 0;k < optimal_occupancy[j][*pstate][brank] - 1;k++) { + pstate--; + *pstate = *(pstate + 1); + } + + if (j >= optimal_occupancy[j][*pstate][brank]) { + pstate--; + *pstate = optimal_state[j][*(pstate + 1)][brank]; + previous_rank = optimal_rank[j][*(pstate + 1)][brank]; + j -= optimal_occupancy[j][*(pstate + 1)][brank]; + brank = previous_rank; + +# ifdef DEBUG + cout << *pstate << " " << optimal_occupancy[j][*pstate][brank] << " " << brank << " | "; +# endif + + } + else { + j -= optimal_occupancy[j][*pstate][brank]; + } + } + while (j >= 0); + +# ifdef DEBUG + cout << endl; +# endif + + likelihood_cumul += exp(forward_max); + +# ifdef DEBUG + pstate = seq.int_sequence[index][0]; + for (j = 0;j < seq.length[index];j++) { +/* state_sequence_probability[j][*pstate++] += exp(forward_max - seq_likelihood); */ + + if (forward_max > state_sequence_probability[j][*pstate]) { + state_sequence_probability[j][*pstate] = forward_max; + } + pstate++; + } +# endif + + nb_cell = 0; + for (j = 0;j < seq.length[index];j++) { + for (k = 0;k < nb_state;k++) { + if (active_cell[j][k]) { + nb_cell++; + } + } + } + +# ifdef MESSAGE + if (i == 0) { + os << "\n"; + } + + pstate = seq.int_sequence[index][0]; + + switch (format) { + + case ASCII : { + for (j = 0;j < seq.length[index];j++) { + os << *pstate++ << " "; + } + +// os << " " << i + 1 << " " << forward_max << " (" << exp(forward_max - state_seq_likelihood) + os << " " << i + 1 << " " << forward_max << " (" << exp(forward_max - seq_likelihood) + << " " << likelihood_cumul / exp(seq_likelihood) << " " << nb_cell << ")" << endl; + + if (nb_component == nb_state) { + os << SEQ_label[SEQL_STATE_BEGIN] << ": "; + + pstate = seq.int_sequence[index][0] + 1; + if (seq.index_parameter) { + for (j = 1;j < seq.length[index];j++) { + if (*pstate != *(pstate - 1)) { + os << seq.index_parameter[index][j] << ", "; + } + pstate++; + } + } + + else { + for (j = 1;j < seq.length[index];j++) { + if (*pstate != *(pstate - 1)) { + os << j << ", "; + } + pstate++; + } + } + + os << endl; + } + break; + } + + case SPREADSHEET : { + for (j = 0;j < seq.length[index];j++) { + os << *pstate++ << "\t"; + } + +// os << "\t" << i + 1 << "\t" << forward_max << "\t" << exp(forward_max - state_seq_likelihood) + os << "\t" << i + 1 << "\t" << forward_max << "\t" << exp(forward_max - seq_likelihood) + << "\t" << likelihood_cumul / exp(seq_likelihood) << "\t" << nb_cell << endl; + break; + } + } +# endif + +# ifdef DEBUG + entropy -= exp(forward_max - seq_likelihood) * forward_max; +# endif + + } + +# ifdef DEBUG + os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << entropy + seq_likelihood << endl; + + if (likelihood_cumul / exp(seq_likelihood) > 0.8) { + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + if (state_sequence_probability[i][j] != D_INF) { + state_sequence_probability[i][j] = exp(state_sequence_probability[i][j] - seq_likelihood); + } + else { + state_sequence_probability[i][j] = 0.; + } + } + } + + pstate = seq.int_sequence[index][0]; + for (j = 0;j < seq.length[index];j++) { + *pstate++ = I_DEFAULT; + } + +// os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + seq.profile_ascii_print(os , index , nb_state , state_sequence_probability , + STAT_label[STATL_STATE]); + } +# endif + + for (i = 0;i < seq.length[index];i++) { + delete [] observation[i]; + } + delete [] observation; + + delete [] obs_product; + + for (i = 0;i < nb_state;i++) { + delete [] forward1[i]; + } + delete [] forward1; + + for (i = 0;i < seq.length[index] - 1;i++) { + for (j = 0;j < nb_state;j++) { + delete [] state_in[i][j]; + } + delete [] state_in[i]; + } + delete [] state_in; + + delete [] rank; + + for (i = 0;i < seq.length[index] - 1;i++) { + for (j = 0;j < nb_state;j++) { + delete [] input_state[i][j]; + } + delete [] input_state[i]; + } + delete [] input_state; + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + delete [] optimal_state[i][j]; + } + delete [] optimal_state[i]; + } + delete [] optimal_state; + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + delete [] optimal_occupancy[i][j]; + } + delete [] optimal_occupancy[i]; + } + delete [] optimal_occupancy; + + for (i = 0;i < seq.length[index] - 1;i++) { + for (j = 0;j < nb_state;j++) { + delete [] input_rank[i][j]; + } + delete [] input_rank[i]; + } + delete [] input_rank; + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + delete [] optimal_rank[i][j]; + } + delete [] optimal_rank[i]; + } + delete [] optimal_rank; + + for (i = 0;i < seq.length[index];i++) { + delete [] active_cell[i]; + } + delete [] active_cell; + + delete [] pioutput; + delete [] proutput; + +# ifdef DEBUG + for (i = 0;i < seq.length[index];i++) { + delete [] state_sequence_probability[i]; + } + delete [] state_sequence_probability; +# endif + + return state_seq_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state profiles using the Viterbi forward-backward algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index, + * \param[in] os stream, + * \param[in] plot pointer on a MultiPlot object, + * \param[in] output output type, + * \param[in] format output format (ASCII/SPREADSHEET/GNUPLOT/PLOT), + * \param[in] seq_likelihood log-likelihood for the observed sequence. + * + * \return log-likelihood for the most probable state sequence. + */ +/*--------------------------------------------------------------*/ + +double HiddenSemiMarkov::viterbi_forward_backward(const MarkovianSequences &seq , int index , + ostream *os , MultiPlot *plot , + state_profile output , output_format format , + double seq_likelihood) const + +{ + int i , j , k , m; + int *pstate , **pioutput; + double obs_product , buff , residual , state_seq_likelihood , backward_max , **observation , + **forward1 , **state_in , **backward , **backward1 , *auxiliary , *occupancy_auxiliary , + **backward_output , **proutput; + DiscreteParametric *occupancy; + + + // initializations + + observation = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + observation[i] = new double[nb_state]; + } + + forward1 = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + forward1[i] = new double[nb_state]; + } + + state_in = new double*[seq.length[index] - 1]; + for (i = 0;i < seq.length[index] - 1;i++) { + state_in[i] = new double[nb_state]; + } + + backward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward[i] = new double[nb_state]; + } + + backward1 = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward1[i] = new double[nb_state]; + } + + auxiliary = new double[nb_state]; + occupancy_auxiliary = new double[seq.length[index] + 1]; + + if (output == SSTATE) { + backward_output = backward; + } + else { + backward_output = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward_output[i] = new double[nb_state]; + } + } + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + +# ifdef MESSAGE + int *state_sequence , **input_state , **optimal_state , **optimal_forward_occupancy; + + input_state = new int*[seq.length[index] - 1]; + for (i = 0;i < seq.length[index] - 1;i++) { + input_state[i] = new int[nb_state]; + } + + optimal_state = new int*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + optimal_state[i] = new int[nb_state]; + } + + optimal_forward_occupancy = new int*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + optimal_forward_occupancy[i] = new int[nb_state]; + } + + state_sequence = new int[seq.length[index]]; +# endif + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + + // forward recurrence + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + + // computation of the observation probabilities + + observation[i][j] = 0.; + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + buff = categorical_process[k]->observation[j]->cumul[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + buff = discrete_parametric_process[k]->observation[j]->cumul[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else if (continuous_parametric_process[k]->ident == LINEAR_MODEL) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->intercept + + continuous_parametric_process[k]->observation[j]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? i : seq.index_parameter[index][i])); + break; + } + + buff = continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual); + } + + else if (continuous_parametric_process[k]->ident == AUTOREGRESSIVE_MODEL) { + if (i == 0) { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + case REAL_VALUE : + residual = *proutput[k] - continuous_parametric_process[k]->observation[j]->location; + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + residual = *pioutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(pioutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + case REAL_VALUE : + residual = *proutput[k] - (continuous_parametric_process[k]->observation[j]->location + + continuous_parametric_process[k]->observation[j]->autoregressive_coeff * + (*(proutput[k] - 1) - continuous_parametric_process[k]->observation[j]->location)); + break; + } + } + + buff = continuous_parametric_process[k]->observation[j]->mass_computation(residual , residual); + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[j]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[j]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + observation[i][j] = D_INF; + break; + } + else { + observation[i][j] += buff; + } + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + obs_product = 0.; + forward1[i][j] = D_INF; + + if (i < seq.length[index] - 1) { + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (observation[i - k + 1][j] == D_INF) { + break; + } + else { + obs_product += observation[i - k + 1][j]; + } + + if (k < i + 1) { + buff = obs_product + occupancy->mass[k] + state_in[i - k][j]; + } + + else { + switch (type) { + case ORDINARY : + buff = obs_product + occupancy->mass[k] + cumul_initial[j]; + break; + case EQUILIBRIUM : + buff = obs_product + forward[j]->mass[k] + cumul_initial[j]; + break; + } + } + + if (buff > forward1[i][j]) { + forward1[i][j] = buff; + +# ifdef MESSAGE + if (k < i + 1) { + optimal_state[i][j] = input_state[i - k][j]; + } + optimal_forward_occupancy[i][j] = k; +# endif + + } + } + } + + else { + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (observation[i - k + 1][j] == D_INF) { + break; + } + else { + obs_product += observation[i - k + 1][j]; + } + + if (k < i + 1) { + buff = obs_product + occupancy->cumul[k - 1] + state_in[i - k][j]; + } + + else { + switch (type) { + case ORDINARY : + buff = obs_product + occupancy->cumul[k - 1] + cumul_initial[j]; + break; + case EQUILIBRIUM : + buff = obs_product + forward[j]->cumul[k - 1] + cumul_initial[j]; + break; + } + } + + if (buff > forward1[i][j]) { + forward1[i][j] = buff; + +# ifdef MESSAGE + if (k < i + 1) { + optimal_state[i][j] = input_state[i - k][j]; + } + optimal_forward_occupancy[i][j] = k; +# endif + + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + forward1[i][j] = cumul_initial[j]; + } + else { + forward1[i][j] = state_in[i - 1][j]; + +# ifdef MESSAGE + optimal_state[i][j] = input_state[i - 1][j]; +# endif + + } + +# ifdef MESSAGE + optimal_forward_occupancy[i][j] = 1; +# endif + + if (forward1[i][j] != D_INF) { + if (observation[i][j] == D_INF) { + forward1[i][j] = D_INF; + } + else { + forward1[i][j] += observation[i][j]; + } + } + break; + } + } + } + + if (i < seq.length[index] - 1) { + for (j = 0;j < nb_state;j++) { + state_in[i][j] = D_INF; + for (k = 0;k < nb_state;k++) { + buff = cumul_transition[k][j] + forward1[i][k]; + if (buff > state_in[i][j]) { + state_in[i][j] = buff; + +# ifdef MESSAGE + input_state[i][j] = k; +# endif + + } + } + } + } + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + } + + // extraction of the log-likelihood for the most probable state sequence + +# ifdef MESSAGE + pstate = state_sequence + seq.length[index] - 1; +# endif + + state_seq_likelihood = D_INF; + i = seq.length[index] - 1; + for (j = 0;j < nb_state;j++) { + if (forward1[i][j] > state_seq_likelihood) { + state_seq_likelihood = forward1[i][j]; + +# ifdef MESSAGE + *pstate = j; +# endif + + } + } + + if (state_seq_likelihood != D_INF) { + +# ifdef MESSAGE + i = seq.length[index] - 1; + + do { + for (j = 0;j < optimal_forward_occupancy[i][*pstate] - 1;j++) { + pstate--; + *pstate = *(pstate + 1); + } + + if (i >= optimal_forward_occupancy[i][*pstate]) { + pstate--; + *pstate = optimal_state[i][*(pstate + 1)]; + i -= optimal_forward_occupancy[i][*(pstate + 1)]; + } + else { + i -= optimal_forward_occupancy[i][*pstate]; + } + } + while (i >= 0); +# endif + + // backward recurrence + + i = seq.length[index] - 1; + for (j = 0;j < nb_state;j++) { + backward1[i][j] = 0.; + backward[i][j] = forward1[i][j]; + + if (output == OUT_STATE) { + backward_output[i][j] = backward[i][j]; + } + } + + for (i = seq.length[index] - 2;i >= 0;i--) { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + obs_product = 0.; + + for (k = 1;k < MIN(seq.length[index] - i , occupancy->nb_value);k++) { + if (observation[i + k][j] == D_INF) { + break; + } + else { + obs_product += observation[i + k][j]; + } + + if (k < seq.length[index] - i - 1) { + occupancy_auxiliary[k] = backward1[i + k][j] + obs_product + occupancy->mass[k]; + } + else { + occupancy_auxiliary[k] = obs_product + occupancy->cumul[k - 1]; + } + } + + auxiliary[j] = D_INF; + for (m = k - 1;m >= 1;m--) { + if (occupancy_auxiliary[m] > auxiliary[j]) { + auxiliary[j] = occupancy_auxiliary[m]; + } + + // transformation of semi-Markovian log-likelihoods in Markovian log-likelihoods + + if ((auxiliary[j] != D_INF) && (state_in[i][j] != D_INF)) { + buff = auxiliary[j] + state_in[i][j]; + if (buff > backward[i + m][j]) { + backward[i + m][j] = buff; + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if ((backward1[i + 1][j] != D_INF) && (observation[i + 1][j] != D_INF)) { + auxiliary[j] = backward1[i + 1][j] + observation[i + 1][j]; + } + else { + auxiliary[j] = D_INF; + } + break; + } + } + } + + for (j = 0;j < nb_state;j++) { + backward1[i][j] = D_INF; + for (k = 0;k < nb_state;k++) { + buff = auxiliary[k] + cumul_transition[j][k]; + if (buff > backward1[i][j]) { + backward1[i][j] = buff; + } + } + + if ((backward1[i][j] != D_INF) && (forward1[i][j] != D_INF)) { + backward[i][j] = backward1[i][j] + forward1[i][j]; + } + else { + backward[i][j] = D_INF; + } + } + + switch (output) { + + case IN_STATE : { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if ((auxiliary[j] != D_INF) && (state_in[i][j] != D_INF)) { + backward_output[i + 1][j] = auxiliary[j] + state_in[i][j]; + } + else { + backward_output[i + 1][j] = D_INF; + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + backward_output[i + 1][j] = D_INF; + + if (auxiliary[j] != D_INF) { + for (k = 0;k < nb_state;k++) { + if (k != j) { + buff = cumul_transition[k][j] + forward1[i][k]; + if (buff > backward_output[i + 1][j]) { + backward_output[i + 1][j] = buff; + } + } + } + + if (backward_output[i + 1][j] != D_INF) { + backward_output[i + 1][j] += auxiliary[j]; + } + } + break; + } + } + } + break; + } + + case OUT_STATE : { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + backward_output[i][j] = backward[i][j]; + break; + } + + // case Markovian state + + case MARKOVIAN : { + backward_output[i][j] = D_INF; + + if (forward1[i][j] != D_INF) { + for (k = 0;k < nb_state;k++) { + if (k != j) { + buff = auxiliary[k] + cumul_transition[j][k]; + if (buff > backward_output[i][j]) { + backward_output[i][j] = buff; + } + } + } + + if (backward_output[i][j] != D_INF) { + backward_output[i][j] += forward1[i][j]; + } + } + break; + } + } + } + break; + } + } + } + + // particular case of staying in the initial state + + for (i = 0;i < nb_state;i++) { + if ((sojourn_type[i] == SEMI_MARKOVIAN) && (cumul_initial[i] != D_INF)) { + occupancy = state_process->sojourn_time[i]; + obs_product = 0.; + + for (j = 1;j < MIN(seq.length[index] + 1 , occupancy->nb_value);j++) { + if (observation[j - 1][i] == D_INF) { + break; + } + else { + obs_product += observation[j - 1][i]; + } + + if (j < seq.length[index]) { + switch (type) { + case ORDINARY : + occupancy_auxiliary[j] = backward1[j - 1][i] + obs_product + occupancy->mass[j]; + break; + case EQUILIBRIUM : + occupancy_auxiliary[j] = backward1[j - 1][i] + obs_product + forward[i]->mass[j]; + break; + } + } + + else { + switch (type) { + case ORDINARY : + occupancy_auxiliary[j] = obs_product + occupancy->cumul[j - 1]; + break; + case EQUILIBRIUM : + occupancy_auxiliary[j] = obs_product + forward[i]->cumul[j - 1]; + break; + } + } + } + + auxiliary[i] = D_INF; + for (k = j - 1;k >= 1;k--) { + if (occupancy_auxiliary[k] > auxiliary[i]) { + auxiliary[i] = occupancy_auxiliary[k]; + } + + // transformation of semi-Markovian log-likelihoods in Markovian log-likelihoods + + if (auxiliary[i] != D_INF) { + buff = auxiliary[i] + cumul_initial[i]; + if (buff > backward[k - 1][i]) { + backward[k - 1][i] = buff; + } + } + } + } + + if (output == IN_STATE) { + backward_output[0][i] = backward[0][i]; + } + } + + // restoration of the most probable state sequence + + pstate = seq.int_sequence[index][0]; + + for (i = 0;i < seq.length[index];i++) { + backward_max = D_INF; + for (j = 0;j < nb_state;j++) { + if (backward[i][j] > backward_max) { + backward_max = backward[i][j]; + *pstate = j; + } + } + +# ifdef MESSAGE + if (*pstate != state_sequence[i]) { + cout << "\nERROR: " << i << " | " << *pstate << " " << state_sequence[i] << endl; + } +# endif + + pstate++; + } + + // normalization + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + if (backward_output[i][j] != D_INF) { + backward_output[i][j] = exp(backward_output[i][j] - seq_likelihood); +// backward_output[i][j] = exp(backward_output[i][j] - state_seq_likelihood); + } + else { + backward_output[i][j] = 0.; + } + } + } + + switch (format) { + + case ASCII : { + switch (output) { + case SSTATE : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + break; + case IN_STATE : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_IN_STATE_PROBABILITY] << "\n\n"; + break; + case OUT_STATE : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_OUT_STATE_PROBABILITY] << "\n\n"; + break; + } + + seq.profile_ascii_print(*os , index , nb_state , backward_output , + STAT_label[STATL_STATE]); + + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_LIKELIHOOD] << ": " << state_seq_likelihood + << " (" << exp(state_seq_likelihood - seq_likelihood) << ")" << endl; + break; + } + + case SPREADSHEET : { + switch (output) { + case SSTATE : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + break; + case IN_STATE : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_IN_STATE_PROBABILITY] << "\n\n"; + break; + case OUT_STATE : + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_OUT_STATE_PROBABILITY] << "\n\n"; + break; + } + + seq.profile_spreadsheet_print(*os , index , nb_state , backward_output , + STAT_label[STATL_STATE]); + + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_LIKELIHOOD] << "\t" << state_seq_likelihood + << "\t" << exp(state_seq_likelihood - seq_likelihood) << endl; + break; + } + + case GNUPLOT : { + seq.profile_plot_print(*os , index , nb_state , backward_output); + break; + } + + case PLOT : { + seq.profile_plotable_write(*plot , index , nb_state , backward_output); + break; + } + } + +# ifdef DEBUG + if (format != GNUPLOT) { + double ambiguity = 0.; + + pstate = seq.int_sequence[index][0]; +// if (output == SSTATE) { + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + if (j != *pstate) { + ambiguity += backward_output[i][j]; + } + } + pstate++; + } + ambiguity *= exp(seq_likelihood - state_seq_likelihood); +/* } + + else { + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + if ((backward[i][j] != D_INF) && (j != *pstate)) { + ambiguity += exp(backward[i][j] - state_seq_likelihood); + } + } + pstate++; + } + } */ + + switch (format) { + case ASCII : + *os << "\n" << SEQ_label[SEQL_AMBIGUITY] << ": " << ambiguity + << " (" << ambiguity / seq.length[index] << ")" << endl; + break; + case SPREADSHEET : + *os << "\n" << SEQ_label[SEQL_AMBIGUITY] << "\t" << ambiguity + << "\t" << ambiguity / seq.length[index] << "\t" << endl; + break; + } + } +# endif + + } + + for (i = 0;i < seq.length[index];i++) { + delete [] observation[i]; + } + delete [] observation; + + for (i = 0;i < seq.length[index];i++) { + delete [] forward1[i]; + } + delete [] forward1; + + for (i = 0;i < seq.length[index] - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + for (i = 0;i < seq.length[index];i++) { + delete [] backward[i]; + } + delete [] backward; + + for (i = 0;i < seq.length[index];i++) { + delete [] backward1[i]; + } + delete [] backward1; + + delete [] auxiliary; + delete [] occupancy_auxiliary; + + if (output != SSTATE) { + for (i = 0;i < seq.length[index];i++) { + delete [] backward_output[i]; + } + delete [] backward_output; + } + + delete [] pioutput; + delete [] proutput; + +# ifdef MESSAGE + for (i = 0;i < seq.length[index] - 1;i++) { + delete [] input_state[i]; + } + delete [] input_state; + + for (i = 0;i < seq.length[index];i++) { + delete [] optimal_state[i]; + } + delete [] optimal_state; + + for (i = 0;i < seq.length[index];i++) { + delete [] optimal_forward_occupancy[i]; + } + delete [] optimal_forward_occupancy; + + delete [] state_sequence; +# endif + + return state_seq_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * writing of the results. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] output output type, + * \param[in] format format (ASCII/SPREADSHEET), + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::state_profile_write(StatError &error , ostream &os , + const MarkovianSequences &iseq , int identifier , + state_profile output , output_format format , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + bool status = true; + int i; + int offset = I_DEFAULT , nb_value , index = I_DEFAULT; + double seq_likelihood , max_marginal_entropy , entropy; + HiddenSemiMarkov *hsmarkov1 , *hsmarkov2; + SemiMarkovData *seq; + + + error.init(); + + for (i = 0;i < iseq.nb_variable;i++) { + if ((iseq.type[i] != INT_VALUE) && (iseq.type[i] != REAL_VALUE) && (iseq.type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (nb_output_process == iseq.nb_variable) { + offset = 0; + } + else if ((iseq.type[0] == STATE) && (nb_output_process + 1 == iseq.nb_variable)) { + offset = 1; + } + else { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + if (offset != I_DEFAULT) { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (iseq.type[i + offset] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (iseq.min_value[i + offset] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!(iseq.marginal_distribution[i + offset])) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < iseq.marginal_distribution[i + offset]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + if (identifier != I_DEFAULT) { + for (i = 0;i < iseq.nb_sequence;i++) { + if (identifier == iseq.identifier[i]) { + index = i; + break; + } + } + + if (i == iseq.nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + if (nb_state_sequence < 2) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE_SEQUENCE]); + } + + if (status) { + if (nb_output_process == iseq.nb_variable) { + seq = new SemiMarkovData(iseq); + } + else { + seq = new SemiMarkovData(iseq , SEQUENCE_COPY , (type == EQUILIBRIUM ? true : false)); + } + + hsmarkov1 = new HiddenSemiMarkov(*this , false); + + hsmarkov2 = new HiddenSemiMarkov(*this , false); + hsmarkov2->create_cumul(); + hsmarkov2->log_computation(); + + for (i = 0;i < seq->nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + seq_likelihood = hsmarkov1->forward_backward(*seq , i , &os , NULL , output , format , + max_marginal_entropy , entropy); + + if (seq_likelihood == D_INF) { + status = false; + + if (index == I_DEFAULT) { + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << " " + << SEQ_error[SEQR_INCOMPATIBLE_MODEL]; + error.update((error_message.str()).c_str()); + } + else { + error.update(SEQ_error[SEQR_SEQUENCE_INCOMPATIBLE_MODEL]); + } + } + + else { + hsmarkov2->viterbi_forward_backward(*seq , i , &os , NULL , output , format , + seq_likelihood); + + switch (state_sequence) { + case GENERALIZED_VITERBI : + hsmarkov2->generalized_viterbi(*seq , i , os , seq_likelihood , format , + nb_state_sequence); + break; + case FORWARD_BACKWARD_SAMPLING : + hsmarkov1->forward_backward_sampling(*seq , i , os , format , + nb_state_sequence); + break; + } + } + } + } + + delete seq; + + delete hsmarkov1; + delete hsmarkov2; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * displaying the results. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the state and entropy profiles and the N most probable state sequences, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] output output type, + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::state_profile_ascii_write(StatError &error , ostream &os , const MarkovianSequences &iseq , + int identifier , state_profile output , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + return state_profile_write(error , os , iseq , identifier , + output , ASCII , state_sequence , nb_state_sequence); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * writing of the results in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] output output type, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::state_profile_write(StatError &error , const string path , + const MarkovianSequences &iseq , int identifier , + state_profile output , output_format format , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + bool status = true; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + else { + status = state_profile_write(error , out_file , iseq , identifier , + output , format , state_sequence , nb_state_sequence); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * displaying of the results. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the state and entropy profiles and the N most probable state sequences, + * \param[in] identifier sequence identifier, + * \param[in] output output type, + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::state_profile_ascii_write(StatError &error , ostream &os , int identifier , + state_profile output , latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + bool status; + + + error.init(); + + if (!semi_markov_data) { + status = false; + error.update(STAT_error[STATR_NO_DATA]); + } + else { + status = state_profile_write(error , os , *semi_markov_data , identifier , + output , ASCII , state_sequence , nb_state_sequence); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * writing of the results in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] identifier sequence identifier, + * \param[in] output output type, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::state_profile_write(StatError &error , const string path , int identifier , + state_profile output , output_format format , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + bool status = true; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + if (!semi_markov_data) { + status = false; + error.update(STAT_error[STATR_NO_DATA]); + } + + if (status) { + status = state_profile_write(error , out_file , *semi_markov_data , identifier , + output , format , state_sequence , nb_state_sequence); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm and + * plot of the results at the Gnuplot format. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] output output type, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::state_profile_plot_write(StatError &error , const char *prefix , + const MarkovianSequences &iseq , int identifier , + state_profile output , const char *title) const + +{ + bool status = true; + int i , j; + int offset = I_DEFAULT , nb_value , index; + double seq_likelihood , max_marginal_entropy , entropy , state_seq_likelihood; + HiddenSemiMarkov *hsmarkov; + SemiMarkovData *seq; + ostringstream data_file_name[2]; + ofstream *out_data_file; + + + error.init(); + + for (i = 0;i < iseq.nb_variable;i++) { + if ((iseq.type[i] != INT_VALUE) && (iseq.type[i] != REAL_VALUE) && (iseq.type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (nb_output_process == iseq.nb_variable) { + offset = 0; + } + else if ((iseq.type[0] == STATE) && (nb_output_process + 1 == iseq.nb_variable)) { + offset = 1; + } + else { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + if (offset != I_DEFAULT) { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (iseq.type[i + offset] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (iseq.min_value[i + offset] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!(iseq.marginal_distribution[i + offset])) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < iseq.marginal_distribution[i + offset]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + for (i = 0;i < iseq.nb_sequence;i++) { + if (identifier == iseq.identifier[i]) { + index = i; + break; + } + } + + if (i == iseq.nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + + if (status) { + + // writing of the date files + + data_file_name[0] << prefix << 0 << ".dat"; + out_data_file = new ofstream((data_file_name[0].str()).c_str()); + + if (!out_data_file) { + status = false; + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + else { + if (iseq.type[0] != STATE) { + seq = new SemiMarkovData(iseq); + } + else { + seq = new SemiMarkovData(iseq , SEQUENCE_COPY , (type == EQUILIBRIUM ? true : false)); + } + + hsmarkov = new HiddenSemiMarkov(*this , false); + + seq_likelihood = hsmarkov->forward_backward(*seq , index , out_data_file , NULL , output , + GNUPLOT , max_marginal_entropy , entropy); + out_data_file->close(); + delete out_data_file; + + if (seq_likelihood == D_INF) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_INCOMPATIBLE_MODEL]); + } + + else { + data_file_name[1] << prefix << 1 << ".dat"; + out_data_file = new ofstream((data_file_name[1].str()).c_str()); + + hsmarkov->create_cumul(); + hsmarkov->log_computation(); + state_seq_likelihood = hsmarkov->viterbi_forward_backward(*seq , index , out_data_file , NULL , + output , GNUPLOT , seq_likelihood); + out_data_file->close(); + delete out_data_file; + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title \""; + if (title) { + out_file << title << " - "; + } + switch (output) { + case SSTATE : + out_file << SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY] << "\"\n\n"; + break; + case IN_STATE : + out_file << SEQ_label[SEQL_MAX_POSTERIOR_IN_STATE_PROBABILITY] << "\"\n\n"; + break; + case OUT_STATE : + out_file << SEQ_label[SEQL_MAX_POSTERIOR_OUT_STATE_PROBABILITY] << "\"\n\n"; + break; + } + + if (seq->index_parameter) { + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [" << seq->index_parameter[index][0] << ":" + << seq->index_parameter[index][seq->length[index] - 1] << "] [0:" + << exp(state_seq_likelihood - seq_likelihood) << "] "; + for (j = 0;j < nb_state;j++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << j + 2 << " title \"" << STAT_label[STATL_STATE] << " " + << j << "\" with linespoints"; + if (j < nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + switch (output) { + case SSTATE : + out_file << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\"\n\n"; + break; + case IN_STATE : + out_file << SEQ_label[SEQL_POSTERIOR_IN_STATE_PROBABILITY] << "\"\n\n"; + break; + case OUT_STATE : + out_file << SEQ_label[SEQL_POSTERIOR_OUT_STATE_PROBABILITY] << "\"\n\n"; + break; + } + + out_file << "plot [" << seq->index_parameter[index][0] << ":" + << seq->index_parameter[index][seq->length[index] - 1] << "] [0:1] "; + for (j = 0;j < nb_state;j++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << j + 2 << " title \"" << STAT_label[STATL_STATE] << " " + << j << "\" with linespoints"; + if (j < nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [" << seq->index_parameter[index][0] << ":" + << seq->index_parameter[index][seq->length[index] - 1] << "] [0:" + << max_marginal_entropy << "] " + << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << nb_state + 2 << " title \"" << SEQ_label[SEQL_CONDITIONAL_ENTROPY] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << nb_state + 3 << " title \"" << SEQ_label[SEQL_MARGINAL_ENTROPY] + << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [" << seq->index_parameter[index][0] << ":" + << seq->index_parameter[index][seq->length[index] - 1] << "] [0:" << entropy << "] " + << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << nb_state + 4 << " title \"" + << SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY] << "\" with linespoints" << endl; + + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + else { + if (seq->length[index] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << seq->length[index] - 1 << "] [0:" + << exp(state_seq_likelihood - seq_likelihood) << "] "; + for (j = 0;j < nb_state;j++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " +// << j + 1 << " title \"" << STAT_label[STATL_STATE] << " " + << 1 << " : " << j + 2 << " title \"" << STAT_label[STATL_STATE] << " " + << j << "\" with linespoints"; + if (j < nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + switch (output) { + case SSTATE : + out_file << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\"\n\n"; + break; + case IN_STATE : + out_file << SEQ_label[SEQL_POSTERIOR_IN_STATE_PROBABILITY] << "\"\n\n"; + break; + case OUT_STATE : + out_file << SEQ_label[SEQL_POSTERIOR_OUT_STATE_PROBABILITY] << "\"\n\n"; + break; + } + + out_file << "plot [0:" << seq->length[index] - 1 << "] [0:1] "; + for (j = 0;j < nb_state;j++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << j + 1 << " title \"" << STAT_label[STATL_STATE] << " " + << j << "\" with linespoints"; + if (j < nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [0:" << seq->length[index] - 1 << "] [0:" << max_marginal_entropy << "] " + << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << nb_state + 1 << " title \"" << SEQ_label[SEQL_CONDITIONAL_ENTROPY] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << nb_state + 2 << " title \"" << SEQ_label[SEQL_MARGINAL_ENTROPY] + << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [0:" << seq->length[index] - 1 << "] [0:" << entropy << "] " + << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << nb_state + 3 << " title \"" << SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY] + << "\" with linespoints" << endl; + + if (seq->length[index] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + delete seq; + delete hsmarkov; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm and + * plot of the results at the Gnuplot format. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] identifier sequence identifier, + * \param[in] output output type, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenSemiMarkov::state_profile_plot_write(StatError &error , const char *prefix , int identifier , + state_profile output , const char *title) const + +{ + bool status; + + + error.init(); + + if (!semi_markov_data) { + status = false; + error.update(STAT_error[STATR_NO_DATA]); + } + else { + status = state_profile_plot_write(error , prefix , *semi_markov_data , identifier , + output , title); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm and + * plot of the profiles. + * + * \param[in] error reference on a StatError object, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] output output type. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* HiddenSemiMarkov::state_profile_plotable_write(StatError &error , + const MarkovianSequences &iseq , + int identifier , state_profile output) const + +{ + bool status = true; + int i; + int offset = I_DEFAULT , nb_value , index; + double seq_likelihood , max_marginal_entropy , entropy , state_seq_likelihood; + HiddenSemiMarkov *hsmarkov; + SemiMarkovData *seq; + ostringstream legend; + MultiPlotSet *plot_set; + + + plot_set = NULL; + error.init(); + + for (i = 0;i < iseq.nb_variable;i++) { + if ((iseq.type[i] != INT_VALUE) && (iseq.type[i] != REAL_VALUE) && (iseq.type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (nb_output_process == iseq.nb_variable) { + offset = 0; + } + else if ((iseq.type[0] == STATE) && (nb_output_process + 1 == iseq.nb_variable)) { + offset = 1; + } + else { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + if (offset != I_DEFAULT) { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (iseq.type[i + offset] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (iseq.min_value[i + offset] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!(iseq.marginal_distribution[i + offset])) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < iseq.marginal_distribution[i + offset]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + for (i = 0;i < iseq.nb_sequence;i++) { + if (identifier == iseq.identifier[i]) { + index = i; + break; + } + } + + if (i == iseq.nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + + if (status) { + plot_set = new MultiPlotSet(4); + + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + if (iseq.type[0] != STATE) { + seq = new SemiMarkovData(iseq); + } + else { + seq = new SemiMarkovData(iseq , SEQUENCE_COPY , (type == EQUILIBRIUM ? true : false)); + } + + hsmarkov = new HiddenSemiMarkov(*this , false); + + seq_likelihood = hsmarkov->forward_backward(*seq , index , NULL , plot_set , output , + PLOT , max_marginal_entropy , entropy); + + if (seq_likelihood == D_INF) { + delete plot_set; + plot_set = NULL; + error.update(SEQ_error[SEQR_SEQUENCE_INCOMPATIBLE_MODEL]); + } + + else { + hsmarkov->create_cumul(); + hsmarkov->log_computation(); + state_seq_likelihood = hsmarkov->viterbi_forward_backward(*seq , index , NULL , &plot[0] , + output , PLOT , seq_likelihood); + + // maximum posterior probabilities + + switch (output) { + case SSTATE : + plot[0].title = SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY]; + break; + case IN_STATE : + plot[0].title = SEQ_label[SEQL_MAX_POSTERIOR_IN_STATE_PROBABILITY]; + break; + case OUT_STATE : + plot[0].title = SEQ_label[SEQL_MAX_POSTERIOR_OUT_STATE_PROBABILITY]; + break; + } + + if (seq->index_parameter) { + plot[0].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq->length[index] - 1]); + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[0].xtics = 1; + } + } + + else { + plot[0].xrange = Range(0 , seq->length[index] - 1); + if (seq->length[index] - 1 < TIC_THRESHOLD) { + plot[0].xtics = 1; + } + } + + plot[0].yrange = Range(0. , exp(state_seq_likelihood - seq_likelihood)); + + for (i = 0;i < nb_state;i++) { + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i; + plot[0][i].legend = legend.str(); + + plot[0][i].style = "linespoints"; + } + + // smoothed probabilities + + switch (output) { + case SSTATE : + plot[1].title = SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY]; + break; + case IN_STATE : + plot[1].title = SEQ_label[SEQL_POSTERIOR_IN_STATE_PROBABILITY]; + break; + case OUT_STATE : + plot[1].title = SEQ_label[SEQL_POSTERIOR_OUT_STATE_PROBABILITY]; + break; + } + + if (seq->index_parameter) { + plot[1].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq->length[index] - 1]); + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[1].xtics = 1; + } + } + + else { + plot[1].xrange = Range(0 , seq->length[index] - 1); + if (seq->length[index] - 1 < TIC_THRESHOLD) { + plot[1].xtics = 1; + } + } + + plot[1].yrange = Range(0. , 1.); + + for (i = 0;i < nb_state;i++) { + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i; + plot[1][i].legend = legend.str(); + + plot[1][i].style = "linespoints"; + } + + // conditional entropy profiles + + if (seq->index_parameter) { + plot[2].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq->length[index] - 1]); + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[2].xtics = 1; + } + } + + else { + plot[2].xrange = Range(0 , seq->length[index] - 1); + if (seq->length[index] - 1 < TIC_THRESHOLD) { + plot[2].xtics = 1; + } + } + + plot[2].yrange = Range(0. , max_marginal_entropy); + + plot[2][0].legend = SEQ_label[SEQL_CONDITIONAL_ENTROPY]; + plot[2][0].style = "linespoints"; + + plot[2][1].legend = SEQ_label[SEQL_MARGINAL_ENTROPY]; + plot[2][1].style = "linespoints"; + + // partial entropy profiles + + if (seq->index_parameter) { + plot[3].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq->length[index] - 1]); + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[3].xtics = 1; + } + } + + else { + plot[3].xrange = Range(0 , seq->length[index] - 1); + if (seq->length[index] - 1 < TIC_THRESHOLD) { + plot[3].xtics = 1; + } + } + + plot[3].yrange = Range(0. ,entropy); + + plot[3][0].legend = SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY]; + plot[3][0].style = "linespoints"; + } + + delete seq; + delete hsmarkov; + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm and + * plot of the results. + * + * \param[in] error reference on a StatError object, + * \param[in] identifier sequence identifier, + * \param[in] output output type. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* HiddenSemiMarkov::state_profile_plotable_write(StatError &error , int identifier , + state_profile output) const + +{ + MultiPlotSet *plot_set; + + + error.init(); + + if (!semi_markov_data) { + plot_set = NULL; + error.update(STAT_error[STATR_NO_DATA]); + } + else { + plot_set = state_profile_plotable_write(error , *semi_markov_data , identifier , output); + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the most probable state sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the posterior state sequence probabilities, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] characteristic_flag flag on the computation of the characteristic distributions. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* HiddenSemiMarkov::state_sequence_computation(StatError &error , ostream *os , + const MarkovianSequences &iseq , + bool characteristic_flag) const + +{ + bool status = true; + int i; + int nb_value; + HiddenSemiMarkov *hsmarkov; + SemiMarkovData *seq; + + + seq = NULL; + error.init(); + + for (i = 0;i < iseq.nb_variable;i++) { + if ((iseq.type[i] != INT_VALUE) && (iseq.type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (nb_output_process != iseq.nb_variable) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + else { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (iseq.type[i] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (iseq.min_value[i] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!(iseq.marginal_distribution[i])) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < iseq.marginal_distribution[i]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + if (status) { + seq = new SemiMarkovData(iseq , ADD_STATE_VARIABLE , (type == EQUILIBRIUM ? true : false)); + + seq->semi_markov = new SemiMarkov(*this , false); + + hsmarkov = new HiddenSemiMarkov(*this , false); + + hsmarkov->forward_backward(*seq); + + hsmarkov->create_cumul(); + hsmarkov->log_computation(); + hsmarkov->viterbi(*seq); + + delete hsmarkov; + + // extraction of the characteristics of the sequences and + // computation of the characteristic distributions of the model + + if (seq->restoration_likelihood == D_INF) { + delete seq; + seq = NULL; + error.update(SEQ_error[SEQR_STATE_SEQUENCE_COMPUTATION_FAILURE]); + } + + else { + seq->likelihood = likelihood_computation(iseq , seq->posterior_probability); + + if ((os) && (seq->nb_sequence <= POSTERIOR_PROBABILITY_NB_SEQUENCE)) { + int j; + int *pstate; + + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] << endl; + for (i = 0;i < seq->nb_sequence;i++) { + *os << SEQ_label[SEQL_SEQUENCE] << " " << seq->identifier[i] << ": " + << seq->posterior_probability[i]; + + if (hsmarkov->nb_component == hsmarkov->nb_state) { + *os << " | " << SEQ_label[SEQL_STATE_BEGIN] << ": "; + + pstate = seq->int_sequence[i][0] + 1; + if (seq->index_parameter) { + for (j = 1;j < seq->length[i];j++) { + if (*pstate != *(pstate - 1)) { + *os << seq->index_parameter[i][j] << ", "; + } + pstate++; + } + } + + else { + for (j = 1;j < seq->length[i];j++) { + if (*pstate != *(pstate - 1)) { + *os << j << ", "; + } + pstate++; + } + } + } + + *os << endl; + } + } + +/* seq->min_value_computation(0); + seq->max_value_computation(0); */ + + seq->min_value[0] = 0; + seq->max_value[0] = nb_state - 1; + seq->build_marginal_frequency_distribution(0); + seq->build_characteristic(0 , true , (type == EQUILIBRIUM ? true : false)); + + seq->build_transition_count(this); + seq->build_observation_frequency_distribution(nb_state); + +/* if ((seq->max_value[0] < nb_state - 1) || (!(seq->characteristics[0]))) { + delete seq; + seq = NULL; + error.update(SEQ_error[SEQR_STATES_NOT_REPRESENTED]); + } + + else if (characteristic_flag) { */ + if (characteristic_flag) { + seq->semi_markov->characteristic_computation(*seq , true); + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Comparison of hidden semi-Markov chains for a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the results of model comparison, + * \param[in] nb_model number of hidden semi-Markov chains, + * \param[in] ihsmarkov pointer on HiddenSemiMarkov objects, + * \param[in] algorithm type of algorithm (FORWARD/VITERBI), + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::comparison(StatError &error , ostream *os , int nb_model , + const HiddenSemiMarkov **ihsmarkov , + latent_structure_algorithm algorithm , + const string path) const + +{ + bool status = true; + int i , j; + int nb_value; + double **likelihood; + HiddenSemiMarkov **hsmarkov; + SemiMarkovData *seq; + + + error.init(); + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + for (i = 0;i < nb_model;i++) { + if (ihsmarkov[i]->nb_output_process != nb_variable) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT_PROCESS]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < nb_variable;j++) { + if ((ihsmarkov[i]->categorical_process[j]) || (ihsmarkov[i]->discrete_parametric_process[j])) { + if (type[j] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << j + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (min_value[j] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << j + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[j]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << j + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (ihsmarkov[i]->categorical_process[j]) { + nb_value = ihsmarkov[i]->categorical_process[j]->nb_value; + } + else { + nb_value = ihsmarkov[i]->discrete_parametric_process[j]->nb_value; + } + + if (nb_value < marginal_distribution[j]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " + << STAT_label[STATL_OUTPUT_PROCESS] << " " << j + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + } + + if (status) { + likelihood = new double*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + likelihood[i] = new double[nb_model]; + } + + hsmarkov = new HiddenSemiMarkov*[nb_model]; + for (i = 0;i < nb_model;i++) { + hsmarkov[i] = new HiddenSemiMarkov(*(ihsmarkov[i]) , false , false); + } + + if (algorithm == VITERBI) { + for (i = 0;i < nb_model;i++) { + hsmarkov[i]->create_cumul(); + hsmarkov[i]->log_computation(); + } + + seq = new SemiMarkovData(*this); + } + + // for each sequence, computation of the log-likelihood for the observed sequence (FORWARD) or + // of the log-likelihood for the most probable state sequence (VITERBI) for each model + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_model;j++) { + switch (algorithm) { + case FORWARD : + likelihood[i][j] = hsmarkov[j]->likelihood_computation(*this , NULL , i); + break; + case VITERBI : + likelihood[i][j] = hsmarkov[j]->viterbi(*seq , NULL , i); + break; + } + } + } + + if (os) { + likelihood_write(*os , nb_model , likelihood , + SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] , true , algorithm); + } + if (!path.empty()) { + status = likelihood_write(error , path , nb_model , likelihood , + SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] , algorithm); + } + + for (i = 0;i < nb_sequence;i++) { + delete [] likelihood[i]; + } + delete [] likelihood; + + for (i = 0;i < nb_model;i++) { + delete hsmarkov[i]; + } + delete [] hsmarkov; + + if (algorithm == VITERBI) { + delete seq; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a hidden semi-Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] divergence_flag flag on the computation of the Kullback-Leibler divergence. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* HiddenSemiMarkov::simulation(StatError &error , + const FrequencyDistribution &length_distribution , + bool counting_flag , bool divergence_flag) const + +{ + int i; + MarkovianSequences *observed_seq; + SemiMarkovData *seq; + + + seq = SemiMarkov::simulation(error , length_distribution , counting_flag , divergence_flag); + + if ((seq) && (!divergence_flag)) { + seq->posterior_probability = new double[seq->nb_sequence]; + for (i = 0;i < seq->nb_sequence;i++) { + seq->posterior_probability[i] = SemiMarkov::likelihood_computation(*seq , i); + } + + observed_seq = seq->remove_variable_1(); + seq->likelihood = likelihood_computation(*observed_seq , seq->posterior_probability); + delete observed_seq; + + forward_backward(*seq); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a hidden semi-Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of sequences, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* HiddenSemiMarkov::simulation(StatError &error , int nb_sequence , + int length , bool counting_flag) const + +{ + int i; + MarkovianSequences *observed_seq; + SemiMarkovData *seq; + + + seq = SemiMarkov::simulation(error , nb_sequence , length , counting_flag); + + if (seq) { + seq->posterior_probability = new double[seq->nb_sequence]; + for (i = 0;i < seq->nb_sequence;i++) { + seq->posterior_probability[i] = SemiMarkov::likelihood_computation(*seq , i); + } + + observed_seq = seq->remove_variable_1(); + seq->likelihood = likelihood_computation(*observed_seq , seq->posterior_probability); + delete observed_seq; + + forward_backward(*seq); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a hidden semi-Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of sequences, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* HiddenSemiMarkov::simulation(StatError &error , int nb_sequence , + const MarkovianSequences &iseq , bool counting_flag) const + +{ + int i; + MarkovianSequences *observed_seq; + SemiMarkovData *seq; + + + seq = SemiMarkov::simulation(error , nb_sequence , iseq , counting_flag); + + if (seq) { + seq->posterior_probability = new double[seq->nb_sequence]; + for (i = 0;i < seq->nb_sequence;i++) { + seq->posterior_probability[i] = SemiMarkov::likelihood_computation(*seq , i); + } + + observed_seq = seq->remove_variable_1(); + seq->likelihood = likelihood_computation(*observed_seq , seq->posterior_probability); + delete observed_seq; + + forward_backward(*seq); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation of semi-markov-switching linear models, which require a single int covariate. + * + * \param[in] error reference on a StatError object, + * \param[in] inb_sequence number of sequences (int), + * \param[in] covariate reference on a Sequences object (covariate) + * \param[in] ivariable variable to be used as covariate (iseq.index_parameter if I_DEFAULT) + * or numbered from 1 to covariate.nb_variable otherwise + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* HiddenSemiMarkov::semi_markov_switching_lm_simulation(StatError &error , int inb_sequence , + const Sequences &covariate, + int ivariable, bool counting_flag) const +{ + int v, k, s, rep, pos, lmv, length; + const int nb_sequence = covariate.get_nb_sequence(); + std::vector lm_var = std::vector(); // variable to be simulated + bool status = true; + bool covariate_is_index = false; + index_parameter_type iindex_param_type; + ostringstream error_message; + int *covariate_values = NULL, **seq_int_values = NULL, **iindex_parameter = NULL; + double **seq_real_values = NULL; + SemiMarkovData *seq = NULL, **seq_array = NULL; + MarkovianSequences *seqm = NULL, *observed_seq = NULL, *mcovariate = NULL; + + error.init(); + + for (v = 0; v < nb_output_process; v++) { + if (continuous_parametric_process[v] != NULL) { + if (continuous_parametric_process[v]->observation[0]->ident == LINEAR_MODEL) + lm_var.push_back(v); + for (k = 1; k < nb_state; k++) { + if ((continuous_parametric_process[v]->observation[0]->ident == LINEAR_MODEL) && + (continuous_parametric_process[v]->observation[k]->ident != LINEAR_MODEL)) { + status = false; + error_message << SEQ_error[SEQR_OUTPUT_PROCESS_TYPE] << " " << v << " " + << STAT_label[STATL_STATE] << " " << k + << ": should be " << STAT_label[STATL_LINEAR_MODEL] << endl; + error.update((error_message.str()).c_str()); + } + } + } + } + + if (lm_var.size() == 0) { + status = false; + error_message << SEQ_error[SEQR_OUTPUT_PROCESS_TYPE] << ": should be " + << STAT_label[STATL_LINEAR_MODEL] << endl; + error.update((error_message.str()).c_str()); + } + + if (status) { + if (ivariable = I_DEFAULT) { + // check that index parameter is present in covariate + if (covariate.get_index_parameter() == NULL) { + status = false; + error_message << SEQ_error[SEQR_INDEX_PARAMETER] << ": " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } else + covariate_is_index = true; + } else { + // check that covariate has type INT_VALUE + if ((ivariable < 1) || (ivariable > covariate.get_nb_variable())) { + status = false; + error_message << ivariable << ": " << STAT_error[STATR_VARIABLE_INDEX] << endl; + error.update((error_message.str()).c_str()); + } + seq_int_values = covariate.get_int_sequence(0); + if ((status) && (seq_int_values[ivariable-1] == NULL)) { + status = false; + error_message << ivariable << ": " << STAT_error[STATR_VARIABLE_TYPE] << endl; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + mcovariate = new MarkovianSequences(covariate); + // simulate ibn_sequence times nb_sequence + iindex_parameter = new int*[nb_sequence*inb_sequence]; + seq_array = new SemiMarkovData*[inb_sequence]; + for (s = 0; s < inb_sequence; s++) { + // simulate sequence using implicit index as covariate + // simulate a set of nb_sequence sequences + seq_array[s] = SemiMarkov::simulation(error , nb_sequence , *mcovariate, counting_flag); + if (seq_array[s] != NULL) { + // resimulate each sequence within seq_array[s] + for (rep = 0; rep < nb_sequence; rep++) { + // seq_int_values = seq_array[s]->get_int_sequence(rep); + length = mcovariate->get_length(rep); + seq_real_values = seq_array[s]->get_real_sequence(rep); + if (!covariate_is_index) { + covariate_values = covariate.get_int_sequence(rep)[ivariable-1]; + iindex_param_type = TIME; + } + else { + covariate_values = covariate.get_index_parameter()[rep]; + iindex_param_type = covariate.get_index_param_type(); + } + // resimulate sequence using given covariate + for (pos = 0; pos < length; pos++) { + k = seq_array[s]->int_sequence[rep][0][pos]; // state + for (v = 0; v < lm_var.size(); v++) { + lmv = lm_var[v]; + seq_real_values[lmv+1][pos]= + this->continuous_parametric_process[lmv]->observation[k]->intercept + + this->continuous_parametric_process[lmv]->observation[k]->slope * covariate_values[pos] + + this->continuous_parametric_process[lmv]->observation[k]->simulation(); + } + } + iindex_parameter[s * nb_sequence + rep] = covariate_values; + } + for (v = 1;v < seq_array[s]->nb_variable;v++) { + seq_array[s]->min_value_computation(v); + seq_array[s]->max_value_computation(v); + + seq_array[s]->build_marginal_frequency_distribution(v); + seq_array[s]->min_interval_computation(v); + } + seq_array[s]->build_observation_frequency_distribution(nb_state); + seq_array[s]->build_observation_histogram(nb_state); + seq_array[s]->build_characteristic(I_DEFAULT , true , (type == EQUILIBRIUM ? true : false)); + } + } + delete mcovariate; + mcovariate = NULL; + if (inb_sequence == 1) + seq = seq_array[0]; + else { + const MarkovianSequences **seq_array_merge = new const MarkovianSequences*[inb_sequence-1]; + for (s = 1; s < inb_sequence; s++) { + seq_array_merge[s-1] = seq_array[s]; + } + seqm = seq_array[0]->merge(error, inb_sequence-1, seq_array_merge); + delete [] seq_array_merge; + seq_array_merge = NULL; + seq = new SemiMarkovData(*seqm, SEQUENCE_COPY, true); + seq->semi_markov = new HiddenSemiMarkov(*this); + delete seqm; + seqm = NULL; + } + } + + if (seq != NULL) { + seq->set_index_parameter(error, iindex_parameter, iindex_param_type); + delete [] iindex_parameter; + iindex_parameter = NULL; + seq->posterior_probability = new double[seq->nb_sequence]; + for (s = 0;s < seq->nb_sequence;s++) { + seq->posterior_probability[s] = SemiMarkov::likelihood_computation(*seq , s); + } + + observed_seq = seq->remove_variable_1(); + seq->likelihood = likelihood_computation(*observed_seq , seq->posterior_probability); + delete observed_seq; + + forward_backward(*seq); + + if (inb_sequence > 1) { + for (s = 0; s < inb_sequence; s++) { + delete seq_array[s]; + seq_array[s] = NULL; + } + delete [] seq_array; + seq_array = NULL; + } + delete [] seq_array; + seq_array = NULL; + } + + return seq; +} + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between hidden semi-Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of hidden semi-Markov chains, + * \param[in] ihsmarkov pointer on HiddenSemiMarkov objects, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* HiddenSemiMarkov::divergence_computation(StatError &error , ostream *os , + int nb_model , const HiddenSemiMarkov **ihsmarkov , + FrequencyDistribution **length_distribution , + const string path) const + +{ + bool status = true , lstatus; + int i , j , k; + int cumul_length , nb_failure; + double **likelihood; + long double divergence; + const HiddenSemiMarkov **hsmarkov; + MarkovianSequences *seq; + SemiMarkovData *simul_seq; + DistanceMatrix *dist_matrix; + ofstream *out_file; + + + dist_matrix = NULL; + error.init(); + + for (i = 0;i < nb_model - 1;i++) { + if (ihsmarkov[i]->type != type) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] << " " << i + 2 << ": " + << SEQ_error[SEQR_MODEL_TYPE]; + error.update((error_message.str()).c_str()); + } + + if (ihsmarkov[i]->nb_output_process != nb_output_process) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_error[STATR_NB_OUTPUT_PROCESS]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < nb_output_process;j++) { + if ((categorical_process[j]) && (ihsmarkov[i]->categorical_process[j]) && + (ihsmarkov[i]->categorical_process[j]->nb_value != categorical_process[j]->nb_value)) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_label[STATL_OUTPUT_PROCESS] << " " << j << " " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + + if (((continuous_parametric_process[j]) && (!(ihsmarkov[i]->continuous_parametric_process[j]))) || + ((!continuous_parametric_process[j]) && (ihsmarkov[i]->continuous_parametric_process[j]))) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_label[STATL_OUTPUT_PROCESS] << " " << j << " " + << SEQ_error[SEQR_OUTPUT_PROCESS_TYPE]; + error.update((error_message.str()).c_str()); + } + } + } + } + + for (i = 0;i < nb_model;i++) { + lstatus = true; + + if ((length_distribution[i]->nb_element < 1) || (length_distribution[i]->nb_element > NB_SEQUENCE)) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_NB_SEQUENCE]; + error.update((error_message.str()).c_str()); + } + if (length_distribution[i]->offset < 2) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + if (length_distribution[i]->nb_value - 1 > MAX_LENGTH) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + + if (!lstatus) { + status = false; + } + + else { + cumul_length = 0; + for (j = length_distribution[i]->offset;j < length_distribution[i]->nb_value;j++) { + cumul_length += j * length_distribution[i]->frequency[j]; + } + + if (cumul_length > CUMUL_LENGTH) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_CUMUL_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + out_file = NULL; + + if (!path.empty()) { + out_file = new ofstream(path.c_str()); + + if (!out_file) { + error.update(STAT_error[STATR_FILE_NAME]); + if (os) { + *os << error; + } + } + } + + hsmarkov = new const HiddenSemiMarkov*[nb_model]; + + hsmarkov[0] = this; + for (i = 1;i < nb_model;i++) { + hsmarkov[i] = ihsmarkov[i - 1]; + } + + dist_matrix = new DistanceMatrix(nb_model , SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN]); + + for (i = 0;i < nb_model;i++) { + + // generation of a sample of sequences using a hidden semi-Markov chain + + simul_seq = hsmarkov[i]->simulation(error , *length_distribution[i] , false , true); + seq = simul_seq->remove_variable_1(); + + likelihood = new double*[seq->nb_sequence]; + for (j = 0;j < seq->nb_sequence;j++) { + likelihood[j] = new double[nb_model]; + } + + for (j = 0;j < seq->nb_sequence;j++) { + likelihood[j][i] = hsmarkov[i]->likelihood_computation(*seq , NULL , j); + + if ((os) && (likelihood[j][i] == D_INF)) { + *os << "\nERROR - " << SEQ_error[SEQR_REFERENCE_MODEL] << ": " << i + 1 << endl; + } + } + + // computation of the log-likelihood of each hidden semi-Markov chain for the sample of sequences + + for (j = 0;j < nb_model;j++) { + if (j != i) { + divergence = 0.; + cumul_length = 0; + nb_failure = 0; + + for (k = 0;k < seq->nb_sequence;k++) { + likelihood[k][j] = hsmarkov[j]->likelihood_computation(*seq , NULL , k); + +// if (divergence != -D_INF) { + if (likelihood[k][j] != D_INF) { + divergence += likelihood[k][i] - likelihood[k][j]; + cumul_length += seq->length[k]; + } + else { + nb_failure++; +// divergence = -D_INF; + } +// } + } + + if ((os) && (nb_failure > 0)) { + *os << "\nWARNING - " << SEQ_error[SEQR_REFERENCE_MODEL] << ": " << i + 1 << ", " + << SEQ_error[SEQR_TARGET_MODEL] << ": " << j + 1 << " - " + << SEQ_error[SEQR_DIVERGENCE_NB_FAILURE] << ": " << nb_failure << endl; + } + +// if (divergence != -D_INF) { + dist_matrix->update(i + 1 , j + 1 , divergence , cumul_length); +// } + } + } + + if (os) { + *os << SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " << seq->nb_sequence << " " + << SEQ_label[SEQL_SIMULATED] << " " << SEQ_label[seq->nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << endl; + seq->likelihood_write(cout , nb_model , likelihood , SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN]); + } + if (out_file) { + *out_file << SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " << seq->nb_sequence << " " + << SEQ_label[SEQL_SIMULATED] << " " << SEQ_label[seq->nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << endl; + seq->likelihood_write(*out_file , nb_model , likelihood , SEQ_label[SEQL_HIDDEN_SEMI_MARKOV_CHAIN]); + } + + for (j = 0;j < seq->nb_sequence;j++) { + delete [] likelihood[j]; + } + delete [] likelihood; + + delete seq; + delete simul_seq; + } + + if (out_file) { + out_file->close(); + delete out_file; + } + + delete hsmarkov; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between hidden semi-Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of hidden semi-Markov chains, + * \param[in] hsmarkov pointer on HiddenSemiMarkov objects, + * \param[in] nb_sequence number of generated sequences, + * \param[in] length sequence length, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* HiddenSemiMarkov::divergence_computation(StatError &error , ostream *os , + int nb_model , const HiddenSemiMarkov **hsmarkov , + int nb_sequence , int length , const string path) const + +{ + bool status = true; + int i; + FrequencyDistribution **length_distribution; + DistanceMatrix *dist_matrix; + + + dist_matrix = NULL; + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + length_distribution = new FrequencyDistribution*[nb_model]; + + length_distribution[0] = new FrequencyDistribution(length + 1); + + length_distribution[0]->nb_element = nb_sequence; + length_distribution[0]->offset = length; + length_distribution[0]->max = nb_sequence; + length_distribution[0]->mean = length; + length_distribution[0]->variance = 0.; + length_distribution[0]->frequency[length] = nb_sequence; + + for (i = 1;i < nb_model;i++) { + length_distribution[i] = new FrequencyDistribution(*length_distribution[0]); + } + + dist_matrix = divergence_computation(error , os , nb_model , hsmarkov , length_distribution , path); + + for (i = 0;i < nb_model;i++) { + delete length_distribution[i]; + } + delete [] length_distribution; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between hidden semi-Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of hidden semi-Markov chains, + * \param[in] hsmarkov pointer on HiddenSemiMarkov objects, + * \param[in] nb_sequence number of generated sequences, + * \param[in] seq pointer on MarkovianSequences objects, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* HiddenSemiMarkov::divergence_computation(StatError &error , ostream *os , + int nb_model , const HiddenSemiMarkov **hsmarkov , + int nb_sequence , const MarkovianSequences **seq , + const string path) const + +{ + int i; + FrequencyDistribution **length_distribution; + DistanceMatrix *dist_matrix; + + + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + dist_matrix = NULL; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + length_distribution = new FrequencyDistribution*[nb_model]; + for (i = 0;i < nb_model;i++) { + length_distribution[i] = seq[i]->length_distribution->frequency_scale(nb_sequence); + } + + dist_matrix = divergence_computation(error , os , nb_model , hsmarkov , length_distribution , path); + + for (i = 0;i < nb_model;i++) { + delete length_distribution[i]; + } + delete [] length_distribution; + } + + return dist_matrix; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/hvomc_algorithms1.cpp b/src/cpp/sequence_analysis/hvomc_algorithms1.cpp new file mode 100644 index 0000000..71de499 --- /dev/null +++ b/src/cpp/sequence_analysis/hvomc_algorithms1.cpp @@ -0,0 +1,2480 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include + +#include "stat_tool/stat_label.h" + +#include "stat_tool/distribution_reestimation.hpp" // problem compiler C++ Windows + +#include "hidden_variable_order_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of a hidden variable-order Markov chain + * for sequences using the forward algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] posterior_probability pointer on the posterior probabilities of the most probable sequences, + * \param[in] index sequence index. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double HiddenVariableOrderMarkov::likelihood_computation(const MarkovianSequences &seq , + double *posterior_probability , int index) const + +{ + int i , j , k , m; + int nb_value , **pioutput; + double likelihood = 0. , seq_likelihood , *forward , *auxiliary , norm , **proutput; + + + // checking of the compatibility of the model with the data + + if (nb_output_process == seq.nb_variable) { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < seq.marginal_distribution[i]->nb_value) { + likelihood = D_INF; + break; + } + } + } + } + + else { + likelihood = D_INF; + } + + if (likelihood != D_INF) { + + // initializations + + forward = new double[nb_row]; + auxiliary = new double[nb_row]; + + pioutput = new int*[seq.nb_variable]; + proutput = new double*[seq.nb_variable]; + + for (i = 0;i < seq.nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < seq.nb_variable;j++) { + switch (seq.type[j]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j]; + break; + } + } + seq_likelihood = 0.; + + norm = 0.; + + switch (type) { + + case ORDINARY : { + for (j = 1;j < nb_row;j++) { + if (order[j] == 1) { + forward[j] = initial[state[j][0]]; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + forward[j] *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + forward[j] *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k] < seq.min_interval[k] / 2)) { + switch (seq.type[k]) { + case INT_VALUE : + forward[j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k]); + break; + case REAL_VALUE : + forward[j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k]); + break; + } + } + + else { + switch (seq.type[k]) { + case INT_VALUE : + forward[j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k] / 2 , *pioutput[k] + seq.min_interval[k] / 2); + break; + case REAL_VALUE : + forward[j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k] / 2 , *proutput[k] + seq.min_interval[k] / 2); + break; + } + } + } + } + + norm += forward[j]; + } + + else { + forward[j] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + forward[j] = initial[j]; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + forward[j] *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + forward[j] *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k] < seq.min_interval[k] / 2)) { + switch (seq.type[k]) { + case INT_VALUE : + forward[j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k]); + break; + case REAL_VALUE : + forward[j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k]); + break; + } + } + + else { + switch (seq.type[k]) { + case INT_VALUE : + forward[j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k] / 2 , *pioutput[k] + seq.min_interval[k] / 2); + break; + case REAL_VALUE : + forward[j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k] / 2 , *proutput[k] + seq.min_interval[k] / 2); + break; + } + } + } + } + + norm += forward[j]; + } + + else { + forward[j] = 0.; + } + } + break; + } + } + + if (norm > 0.) { + for (j = 1;j < nb_row;j++) { + forward[j] /= norm; + } + seq_likelihood += log(norm); + } + + else { + seq_likelihood = D_INF; + break; + } + + for (j = 1;j < seq.length[i];j++) { + for (k = 0;k < seq.nb_variable;k++) { + switch (seq.type[k]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + norm = 0.; + + for (k = 1;k < nb_row;k++) { + auxiliary[k] = 0.; + for (m = 0;m < nb_memory[k];m++) { + auxiliary[k] += transition[previous[k][m]][state[k][0]] * forward[previous[k][m]]; + } + + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + auxiliary[k] *= categorical_process[m]->observation[state[k][0]]->mass[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + auxiliary[k] *= discrete_parametric_process[m]->observation[state[k][0]]->mass[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m] < seq.min_interval[m] / 2)) { + switch (seq.type[m]) { + case INT_VALUE : + auxiliary[k] *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m]); + break; + case REAL_VALUE : + auxiliary[k] *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m]); + break; + } + } + + else { + switch (seq.type[m]) { + case INT_VALUE : + auxiliary[k] *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] - seq.min_interval[m] / 2 , *pioutput[m] + seq.min_interval[m] / 2); + break; + case REAL_VALUE : + auxiliary[k] *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] - seq.min_interval[m] / 2 , *proutput[m] + seq.min_interval[m] / 2); + break; + } + } + } + } + + norm += auxiliary[k]; + } + + if (norm > 0.) { + for (k = 1;k < nb_row;k++) { + forward[k] = auxiliary[k] / norm; + } + seq_likelihood += log(norm); + } + + else { + seq_likelihood = D_INF; + break; + } + } + + if (seq_likelihood != D_INF) { + likelihood += seq_likelihood; + if (posterior_probability) { + posterior_probability[i] = exp(posterior_probability[i] - seq_likelihood); + } + } + + else { + likelihood = D_INF; + break; + } + } + } + + delete [] forward; + delete [] auxiliary; + + delete [] pioutput; + delete [] proutput; + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a hidden variable-order Markov chain using the EM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] ihmarkov initial hidden variable-order Markov chain, + * \param[in] global_initial_transition type of estimation of the initial transition probabilities (ordinary process case), + * \param[in] common_dispersion flag common dispersion parameter (continuous observation processes), + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] state_sequence flag on the computation of the restored state sequences, + * \param[in] nb_iter number of iterations. + * + * \return HiddenVariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenVariableOrderMarkov* MarkovianSequences::hidden_variable_order_markov_estimation(StatError &error , ostream *os , + const HiddenVariableOrderMarkov &ihmarkov , + bool global_initial_transition , + bool common_dispersion , + bool counting_flag , bool state_sequence , + int nb_iter) const + +{ + bool status; + int i , j , k , m; + int nb_terminal , max_nb_value , iter , **pioutput; + double likelihood = D_INF , previous_likelihood , observation_likelihood , **forward , norm , + **predicted , buff , *backward , *auxiliary , ***state_sequence_count , diff , variance , + **mean_direction , global_mean_direction , concentration , **proutput; + Distribution *weight; + ChainReestimation *chain_reestim; + Reestimation ***observation_reestim; + FrequencyDistribution *hobservation; + HiddenVariableOrderMarkov *hmarkov; + VariableOrderMarkovData *seq; + + + hmarkov = NULL; + error.init(); + + // test number of values for each variable + + status = false; + for (i = 0;i < nb_variable;i++) { + if (max_value[i] > min_value[i]) { + status = true; + break; + } + } + + if (!status) { + error.update(STAT_error[STATR_VARIABLE_NB_VALUE]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (ihmarkov.nb_output_process != nb_variable) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + else { + for (i = 0;i < nb_variable;i++) { + if ((ihmarkov.categorical_process[i]) || (ihmarkov.discrete_parametric_process[i])) { + if (type[i] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (min_value[i] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (((ihmarkov.categorical_process[i]) && + (ihmarkov.categorical_process[i]->nb_value != marginal_distribution[i]->nb_value)) || + ((ihmarkov.discrete_parametric_process[i]) && + (ihmarkov.discrete_parametric_process[i]->nb_value < marginal_distribution[i]->nb_value))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + + else if ((ihmarkov.categorical_process[i]) && (!characteristics[i])) { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + } + } + + if ((nb_iter != I_DEFAULT) && (nb_iter < 1)) { + status = false; + error.update(STAT_error[STATR_NB_ITERATION]); + } + + if (status) { + + // construction of the hidden variable-order Markov chain + + hmarkov = new HiddenVariableOrderMarkov(ihmarkov , false); + + if (hmarkov->type == EQUILIBRIUM) { + nb_terminal = (hmarkov->nb_row - 1) * (hmarkov->nb_state - 1) / hmarkov->nb_state + 1; + + for (i = 1;i < hmarkov->nb_row;i++) { + if (!hmarkov->child[i]) { + hmarkov->initial[i] = 1. / (double)nb_terminal; + } + else { + hmarkov->initial[i] = 0.; + } + } + } + + if (common_dispersion) { + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->continuous_parametric_process[i]) { + hmarkov->continuous_parametric_process[i]->tied_dispersion = true; + } + } + } + +# ifdef DEBUG + cout << *hmarkov; +# endif + + // construction of the data structures of the algorithm + + forward = new double*[max_length]; + for (i = 0;i < max_length;i++) { + forward[i] = new double[hmarkov->nb_row]; + } + + predicted = new double*[max_length]; + for (i = 0;i < max_length;i++) { + predicted[i] = new double[hmarkov->nb_row]; + } + + backward = new double[hmarkov->nb_row]; + + auxiliary = new double[hmarkov->nb_row]; + + chain_reestim = new ChainReestimation(hmarkov->type , hmarkov->nb_state , hmarkov->nb_row); + + observation_reestim = new Reestimation**[hmarkov->nb_output_process]; + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (marginal_distribution[i]) { + observation_reestim[i] = new Reestimation*[hmarkov->nb_state]; + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j] = new Reestimation(marginal_distribution[i]->nb_value); + } + } + + else { + observation_reestim[i] = NULL; + } + } + + max_nb_value = 0; + for (i = 0;i < hmarkov->nb_output_process;i++) { + if ((hmarkov->discrete_parametric_process[i]) && + (max_nb_value < marginal_distribution[i]->nb_value)) { + max_nb_value = marginal_distribution[i]->nb_value; + } + } + + if (max_nb_value > 0) { + hobservation = new FrequencyDistribution(max_nb_value); + } + else { + hobservation = NULL; + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (!marginal_distribution[i]) { + break; + } + } + + if (i < hmarkov->nb_output_process) { + state_sequence_count = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + state_sequence_count[i] = new double*[length[i]]; + for (j = 0;j < length[i];j++) { + state_sequence_count[i][j] = new double[hmarkov->nb_state]; + } + } + } + else { + state_sequence_count = NULL; + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if ((hmarkov->continuous_parametric_process[i]) && + (hmarkov->continuous_parametric_process[i]->ident == VON_MISES)) { + break; + } + } + + if (i < hmarkov->nb_output_process) { + mean_direction = new double*[hmarkov->nb_state]; + for (i = 0;i < hmarkov->nb_state;i++) { + mean_direction[i] = new double[4]; + } + } + else { + mean_direction = NULL; + } + + pioutput = new int*[nb_variable]; + proutput = new double*[nb_variable]; + + iter = 0; + do { + iter++; + previous_likelihood = likelihood; + likelihood = 0.; + + // initialization of the reestimation quantities + + chain_reestim->init(); + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (observation_reestim[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + observation_reestim[i][j]->frequency[k] = 0.; + } + } + } + } + + if (state_sequence_count) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < hmarkov->nb_state;k++) { + state_sequence_count[i][j][k] = 0.; + } + } + } + } + + for (i = 0;i < nb_sequence;i++) { + + // forward recurrence + + for (j = 0;j < nb_variable;j++) { + switch (type[j]) { + case INT_VALUE : + pioutput[j] = int_sequence[i][j]; + break; + case REAL_VALUE : + proutput[j] = real_sequence[i][j]; + break; + } + } + + norm = 0.; + + switch (hmarkov->type) { + + case ORDINARY : { + for (j = 1;j < hmarkov->nb_row;j++) { + if (hmarkov->order[j] == 1) { + forward[0][j] = hmarkov->initial[hmarkov->state[j][0]]; + + for (k = 0;k < hmarkov->nb_output_process;k++) { + if (hmarkov->categorical_process[k]) { + forward[0][j] *= hmarkov->categorical_process[k]->observation[hmarkov->state[j][0]]->mass[*pioutput[k]]; + } + + else if (hmarkov->discrete_parametric_process[k]) { + forward[0][j] *= hmarkov->discrete_parametric_process[k]->observation[hmarkov->state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((hmarkov->continuous_parametric_process[k]->ident == GAMMA) || + (hmarkov->continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (min_value[k] < min_interval[k] / 2)) { + switch (type[k]) { + case INT_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + min_interval[k]); + break; + case REAL_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + min_interval[k]); + break; + } + } + + else { + switch (type[k]) { + case INT_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*pioutput[k] - min_interval[k] / 2 , *pioutput[k] + min_interval[k] / 2); + break; + case REAL_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*proutput[k] - min_interval[k] / 2 , *proutput[k] + min_interval[k] / 2); + break; + } + } + } + } + + norm += forward[0][j]; + } + + else { + forward[0][j] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < hmarkov->nb_row;j++) { + if (!(hmarkov->child[j])) { + forward[0][j] = hmarkov->initial[j]; + + for (k = 0;k < hmarkov->nb_output_process;k++) { + if (hmarkov->categorical_process[k]) { + forward[0][j] *= hmarkov->categorical_process[k]->observation[hmarkov->state[j][0]]->mass[*pioutput[k]]; + } + + else if (hmarkov->discrete_parametric_process[k]) { + forward[0][j] *= hmarkov->discrete_parametric_process[k]->observation[hmarkov->state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((hmarkov->continuous_parametric_process[k]->ident == GAMMA) || + (hmarkov->continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (min_value[k] < min_interval[k] / 2)) { + switch (type[k]) { + case INT_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + min_interval[k]); + break; + case REAL_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + min_interval[k]); + break; + } + } + + else { + switch (type[k]) { + case INT_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*pioutput[k] - min_interval[k] / 2 , *pioutput[k] + min_interval[k] / 2); + break; + case REAL_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*proutput[k] - min_interval[k] / 2 , *proutput[k] + min_interval[k] / 2); + break; + } + } + } + } + + norm += forward[0][j]; + } + + else { + forward[0][j] = 0.; + } + } + break; + } + } + + if (norm > 0.) { + for (j = 1;j < hmarkov->nb_row;j++) { + forward[0][j] /= norm; + } + + likelihood += log(norm); + } + + else { + likelihood = D_INF; + break; + } + + for (j = 1;j < length[i];j++) { + for (k = 0;k < nb_variable;k++) { + switch (type[k]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + norm = 0.; + + for (k = 1;k < hmarkov->nb_row;k++) { + forward[j][k] = 0.; + for (m = 0;m < hmarkov->nb_memory[k];m++) { + forward[j][k] += hmarkov->transition[hmarkov->previous[k][m]][hmarkov->state[k][0]] * + forward[j - 1][hmarkov->previous[k][m]]; + } + predicted[j][k] = forward[j][k]; + + for (m = 0;m < hmarkov->nb_output_process;m++) { + if (hmarkov->categorical_process[m]) { + forward[j][k] *= hmarkov->categorical_process[m]->observation[hmarkov->state[k][0]]->mass[*pioutput[m]]; + } + + else if (hmarkov->discrete_parametric_process[m]) { + forward[j][k] *= hmarkov->discrete_parametric_process[m]->observation[hmarkov->state[k][0]]->mass[*pioutput[m]]; + } + + else { + if (((hmarkov->continuous_parametric_process[m]->ident == GAMMA) || + (hmarkov->continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (min_value[m] < min_interval[m] / 2)) { + switch (type[m]) { + case INT_VALUE : + forward[j][k] *= hmarkov->continuous_parametric_process[m]->observation[hmarkov->state[k][0]]->mass_computation(*pioutput[m] , *pioutput[m] + min_interval[m]); + break; + case REAL_VALUE : + forward[j][k] *= hmarkov->continuous_parametric_process[m]->observation[hmarkov->state[k][0]]->mass_computation(*proutput[m] , *proutput[m] + min_interval[m]); + break; + } + } + + else { + switch (type[m]) { + case INT_VALUE : + forward[j][k] *= hmarkov->continuous_parametric_process[m]->observation[hmarkov->state[k][0]]->mass_computation(*pioutput[m] - min_interval[m] / 2 , *pioutput[m] + min_interval[m] / 2); + break; + case REAL_VALUE : + forward[j][k] *= hmarkov->continuous_parametric_process[m]->observation[hmarkov->state[k][0]]->mass_computation(*proutput[m] - min_interval[m] / 2 , *proutput[m] + min_interval[m] / 2); + break; + } + } + } + } + + norm += forward[j][k]; + } + + if (norm > 0.) { + for (k = 1;k < hmarkov->nb_row;k++) { + forward[j][k] /= norm; + } + likelihood += log(norm); + } + + else { + likelihood = D_INF; + break; + } + } + + if (likelihood == D_INF) { + break; + } + + // backward recurrence + + j = length[i] - 1; + for (k = 1;k < hmarkov->nb_row;k++) { + backward[k] = forward[j][k]; + + // accumulation of the reestimation quantities of the observation distributions + + for (m = 0;m < hmarkov->nb_output_process;m++) { + if (observation_reestim[m]) { + observation_reestim[m][hmarkov->state[k][0]]->frequency[*pioutput[m]] += backward[k]; + } + } + + if (state_sequence_count) { + state_sequence_count[i][j][hmarkov->state[k][0]] += backward[k]; + } + } + + for (j = length[i] - 2;j >= 0;j--) { + for (k = 0;k < nb_variable;k++) { + if (type[k] == INT_VALUE) { + pioutput[k]--; + } + } + + for (k = 1;k < hmarkov->nb_row;k++) { + if (predicted[j + 1][k] > 0.) { + auxiliary[k] = backward[k] / predicted[j + 1][k]; + } + else { + auxiliary[k] = 0.; + } + } + + for (k = 1;k < hmarkov->nb_row;k++) { + backward[k] = 0.; + + if (hmarkov->next[k]) { + for (m = 0;m < hmarkov->nb_state;m++) { + buff = auxiliary[hmarkov->next[k][m]] * hmarkov->transition[k][m] * forward[j][k]; + backward[k] += buff; + + // accumulation of the reestimation quantities of the transition probabilities + + chain_reestim->transition[k][m] += buff; + } + + // accumulation of the reestimation quantities of the observation distributions + + for (m = 0;m < hmarkov->nb_output_process;m++) { + if (observation_reestim[m]) { + observation_reestim[m][hmarkov->state[k][0]]->frequency[*pioutput[m]] += backward[k]; + } + } + + if (state_sequence_count) { + state_sequence_count[i][j][hmarkov->state[k][0]] += backward[k]; + } + } + } + +# ifdef DEBUG +/* cout << j << " : "; + sum = 0.; + for (k = 1;k < hmarkov->nb_row;k++) { + sum += backward[k]; + cout << backward[k] << " "; + } + cout << "| " << sum << endl; */ +# endif + + } + + // accumulation of the reestimation quantities of the initial probabilities + + if (hmarkov->type == ORDINARY) { + for (j = 1;j < hmarkov->nb_row;j++) { + if (hmarkov->order[j] == 1) { + chain_reestim->initial[hmarkov->state[j][0]] += backward[j]; + } + } + } + } + + if (likelihood != D_INF) { + + // reestimation of the initial probabilities + + if (hmarkov->type == ORDINARY) { + reestimation(hmarkov->nb_state , chain_reestim->initial , + hmarkov->initial , MIN_PROBABILITY , false); + } + + // reestimation of the transition probabilities + + for (i = hmarkov->nb_row - 1;i >= 1;i--) { + if (hmarkov->memo_type[i] == COMPLETION) { +/* if ((hmarkov->memo_type[i] == COMPLETION) || ((hmarkov->type == ORDINARY) && + (global_initial_transition) && (hmarkov->order[i] > 1))) { */ + for (j = 0;j < hmarkov->nb_state;j++) { + chain_reestim->transition[hmarkov->parent[i]][j] += chain_reestim->transition[i][j]; + } + } + } + + for (i = 1;i < hmarkov->nb_row;i++) { + if ((hmarkov->memo_type[i] == TERMINAL) || ((hmarkov->type == ORDINARY) && + (hmarkov->memo_type[i] == NON_TERMINAL))) { + reestimation(hmarkov->nb_state , chain_reestim->transition[i] , + hmarkov->transition[i] , MIN_PROBABILITY , false); + } + else if (hmarkov->memo_type[i] == COMPLETION) { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->transition[i][j] = hmarkov->transition[hmarkov->parent[i]][j]; + } + } + } + + if (hmarkov->type == EQUILIBRIUM) { + hmarkov->initial_probability_computation(); + } + + // reestimation of the observation distributions + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + reestimation(marginal_distribution[i]->nb_value , observation_reestim[i][j]->frequency , + hmarkov->categorical_process[i]->observation[j]->mass , + MIN_PROBABILITY , false); + } + } + + else if (observation_reestim[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j]->nb_value_computation(); + observation_reestim[i][j]->offset_computation(); + observation_reestim[i][j]->nb_element_computation(); + observation_reestim[i][j]->max_computation(); + if ((hmarkov->discrete_parametric_process[i]) || + (hmarkov->continuous_parametric_process[i]->ident != ZERO_INFLATED_GAMMA)) { + observation_reestim[i][j]->mean_computation(); + observation_reestim[i][j]->variance_computation(true); +// observation_reestim[i][j]->variance_computation(); + } + } + + if (hmarkov->discrete_parametric_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + hobservation->update(observation_reestim[i][j] , + MAX((int)(observation_reestim[i][j]->nb_element * + MAX(sqrt(observation_reestim[i][j]->variance) , 1.) * OBSERVATION_COEFF) , MIN_NB_ELEMENT)); + observation_likelihood = hobservation->Reestimation::type_parametric_estimation(hmarkov->discrete_parametric_process[i]->observation[j] , + 0 , true , OBSERVATION_THRESHOLD); + + if (observation_likelihood != D_INF) { + hmarkov->discrete_parametric_process[i]->observation[j]->computation(marginal_distribution[i]->nb_value , + OBSERVATION_THRESHOLD); + + if (hmarkov->discrete_parametric_process[i]->observation[j]->ident == BINOMIAL) { + for (k = hmarkov->discrete_parametric_process[i]->observation[j]->nb_value;k < marginal_distribution[i]->nb_value;k++) { + hmarkov->discrete_parametric_process[i]->observation[j]->mass[k] = 0.; + } + } + } + } + } + + else { + switch (hmarkov->continuous_parametric_process[i]->ident) { + + case GAMMA : { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j]->gamma_estimation(hmarkov->continuous_parametric_process[i]->observation[j] , iter); + } + break; + } + + case ZERO_INFLATED_GAMMA : { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j]->zero_inflated_gamma_estimation(hmarkov->continuous_parametric_process[i]->observation[j] , iter); + } + break; + } + + case GAUSSIAN : { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->location = observation_reestim[i][j]->mean; + } + + if (common_dispersion) { + variance = 0.; + buff = 0.; + + for (j = 0;j < hmarkov->nb_state;j++) { + for (k = observation_reestim[i][j]->offset;k < observation_reestim[i][j]->nb_value;k++) { + diff = k - observation_reestim[i][j]->mean; + variance += observation_reestim[i][j]->frequency[k] * diff * diff; + } + + buff += observation_reestim[i][j]->nb_element; + } + + variance /= buff; +// variance /= (buff - 1); + + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = sqrt(variance); + } + } + + else { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = sqrt(observation_reestim[i][j]->variance); + if (hmarkov->continuous_parametric_process[i]->observation[j]->dispersion / + hmarkov->continuous_parametric_process[i]->observation[j]->location < GAUSSIAN_MIN_VARIATION_COEFF) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = hmarkov->continuous_parametric_process[i]->observation[j]->location * GAUSSIAN_MIN_VARIATION_COEFF; + } + } + } + + break; + } + + case VON_MISES : { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j]->mean_direction_computation(mean_direction[j]); + hmarkov->continuous_parametric_process[i]->observation[j]->location = mean_direction[j][3]; + } + + if (common_dispersion) { + global_mean_direction = 0.; + buff = 0.; + + for (j = 0;j < hmarkov->nb_state;j++) { + global_mean_direction += observation_reestim[i][j]->nb_element * mean_direction[j][2]; + buff += observation_reestim[i][j]->nb_element; + } + concentration = von_mises_concentration_computation(global_mean_direction / buff); + + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = concentration; + } + } + + else { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = von_mises_concentration_computation(mean_direction[j][2]); + } + } + break; + } + } + } + } + + else { + switch (hmarkov->continuous_parametric_process[i]->ident) { + case GAMMA : + gamma_estimation(state_sequence_count , i , + hmarkov->continuous_parametric_process[i] , iter); + break; + case ZERO_INFLATED_GAMMA : + zero_inflated_gamma_estimation(state_sequence_count , i , + hmarkov->continuous_parametric_process[i] , iter); + break; + case GAUSSIAN : + gaussian_estimation(state_sequence_count , i , + hmarkov->continuous_parametric_process[i]); + break; + case VON_MISES : + von_mises_estimation(state_sequence_count , i , + hmarkov->continuous_parametric_process[i]); + break; + } + } + } + } + + if (os) { + *os << STAT_label[STATL_ITERATION] << " " << iter << " " + << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << ": " << likelihood << endl; + } + +# ifdef DEBUG + if (iter % 5 == 0) { + cout << *hmarkov; + } +# endif + + } + while ((likelihood != D_INF) && (((nb_iter == I_DEFAULT) && (iter < VARIABLE_ORDER_MARKOV_NB_ITER) && + ((likelihood - previous_likelihood) / -likelihood > VARIABLE_ORDER_MARKOV_LIKELIHOOD_DIFF)) || + ((nb_iter != I_DEFAULT) && (iter < nb_iter)))); + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << iter << " " << STAT_label[STATL_ITERATIONS] << endl; + } + + // reestimation of the initial probabilities + + if (hmarkov->type == ORDINARY) { + reestimation(hmarkov->nb_state , chain_reestim->initial , + hmarkov->initial , MIN_PROBABILITY , true); + } + + // reestimation of the transition probabilities + + if ((hmarkov->type == ORDINARY) && (global_initial_transition)) { + for (i = hmarkov->nb_row - 1;i >= 1;i--) { + if ((hmarkov->memo_type[i] != COMPLETION) && (hmarkov->order[i] > 1)) { + for (j = 0;j < hmarkov->nb_state;j++) { + chain_reestim->transition[hmarkov->parent[i]][j] += chain_reestim->transition[i][j]; + } + } + } + } + + for (i = 1;i < hmarkov->nb_row;i++) { + if ((hmarkov->memo_type[i] == TERMINAL) || ((hmarkov->type == ORDINARY) && + (hmarkov->memo_type[i] == NON_TERMINAL))) { + reestimation(hmarkov->nb_state , chain_reestim->transition[i] , + hmarkov->transition[i] , MIN_PROBABILITY , true); + } + else if (hmarkov->memo_type[i] == COMPLETION) { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->transition[i][j] = hmarkov->transition[hmarkov->parent[i]][j]; + } + } + } + + if (hmarkov->type == EQUILIBRIUM) { + hmarkov->initial_probability_computation(); + } + + // reestimation of the categorical observation distributions + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + reestimation(marginal_distribution[i]->nb_value , observation_reestim[i][j]->frequency , + hmarkov->categorical_process[i]->observation[j]->mass , + MIN_PROBABILITY , true); + } + } + + else if (hmarkov->discrete_parametric_process[i]) { + hmarkov->discrete_parametric_process[i]->nb_value_computation(); + } + } + } + + // destruction of the data structures of the algorithm + + for (i = 0;i < max_length;i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 0;i < max_length;i++) { + delete [] predicted[i]; + } + delete [] predicted; + + delete [] backward; + + delete [] auxiliary; + + delete chain_reestim; + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (observation_reestim[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + delete observation_reestim[i][j]; + } + delete [] observation_reestim[i]; + } + } + delete [] observation_reestim; + + delete hobservation; + + if (state_sequence_count) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + delete [] state_sequence_count[i][j]; + } + delete [] state_sequence_count[i]; + } + delete [] state_sequence_count; + } + + if (mean_direction) { + for (i = 0;i < hmarkov->nb_state;i++) { + delete [] mean_direction[i]; + } + delete [] mean_direction; + } + + delete [] pioutput; + delete [] proutput; + + if (likelihood == D_INF) { + delete hmarkov; + hmarkov = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + + else { + if (state_sequence) { + hmarkov->markov_data = new VariableOrderMarkovData(*this , ADD_STATE_VARIABLE , + (hmarkov->type == EQUILIBRIUM ? true : false)); + seq = hmarkov->markov_data; + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (((hmarkov->discrete_parametric_process[i]) || (hmarkov->continuous_parametric_process[i])) && + (seq->characteristics[i + 1])) { + delete seq->characteristics[i + 1]; + seq->characteristics[i + 1] = NULL; + } + } + + hmarkov->forward_backward(*seq); + + hmarkov->create_cumul(); + hmarkov->log_computation(); + hmarkov->viterbi(*seq); + hmarkov->remove_cumul(); + + seq->min_value_computation(0); + seq->max_value_computation(0); + seq->build_marginal_frequency_distribution(0); + seq->build_characteristic(0); + + seq->build_transition_count(*hmarkov); + seq->build_observation_frequency_distribution(hmarkov->nb_state); + seq->build_observation_histogram(hmarkov->nb_state); + + // computation of the mixtures of observation distributions (weights deduced from the restoration) + + weight = seq->weight_computation(); + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + hmarkov->categorical_process[i]->restoration_weight = new Distribution(*weight); + hmarkov->categorical_process[i]->restoration_mixture = hmarkov->categorical_process[i]->mixture_computation(hmarkov->categorical_process[i]->restoration_weight); + } + + else if (hmarkov->discrete_parametric_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->discrete_parametric_process[i]->observation[j]->cumul_computation(); + } + + hmarkov->discrete_parametric_process[i]->restoration_weight = new Distribution(*weight); + hmarkov->discrete_parametric_process[i]->restoration_mixture = hmarkov->discrete_parametric_process[i]->mixture_computation(hmarkov->discrete_parametric_process[i]->restoration_weight); + } + + else if (hmarkov->continuous_parametric_process[i]) { + hmarkov->continuous_parametric_process[i]->restoration_weight = new Distribution(*weight); + } + } + + delete weight; + + if (os) { + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD] << ": " << seq->restoration_likelihood; + + for (i = 0;i < nb_variable;i++) { + if (type[i] == REAL_VALUE) { + break; + } + } + if (i == nb_variable) { + *os << " | " << hmarkov->VariableOrderMarkov::likelihood_computation(*seq); + } + *os << endl; + } + } + + else { + hmarkov->markov_data = new VariableOrderMarkovData(*this , SEQUENCE_COPY , + (hmarkov->type == EQUILIBRIUM ? true : false)); + seq = hmarkov->markov_data; + if (seq->type[0] == STATE) { + seq->state_variable_init(INT_VALUE); + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (((hmarkov->discrete_parametric_process[i]) || (hmarkov->continuous_parametric_process[i])) && + (seq->characteristics[i])) { + delete seq->characteristics[i]; + seq->characteristics[i] = NULL; + } + } + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->categorical_process[i]->observation[j]->cumul_computation(); + + hmarkov->categorical_process[i]->observation[j]->max_computation(); +// hmarkov->categorical_process[i]->observation[j]->mean_computation(); +// hmarkov->categorical_process[i]->observation[j]->variance_computation(); + } + } + } + + // computation of the log-likelihood and the characteristic distributions of the model + + seq->likelihood = hmarkov->likelihood_computation(*this , seq->posterior_probability); + +# ifdef DEBUG +// cout << *hmarkov; + cout << "iteration " << iter << " " + << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << ": " << seq->likelihood << endl; +# endif + + if ((os) && (state_sequence) && (seq->nb_sequence <= POSTERIOR_PROBABILITY_NB_SEQUENCE)) { + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] << endl; + for (i = 0;i < seq->nb_sequence;i++) { + *os << SEQ_label[SEQL_SEQUENCE] << " " << seq->identifier[i] << ": " + << seq->posterior_probability[i] << endl; + } + } + + hmarkov->component_computation(); + hmarkov->characteristic_computation(*seq , counting_flag , I_DEFAULT , false); + + // computation of the mixtures of observation distributions (theoretical weights) + + switch (hmarkov->type) { + + case ORDINARY : { + weight = hmarkov->state_process->weight_computation(); + break; + } + + case EQUILIBRIUM : { + weight = new Distribution(hmarkov->nb_state); + + for (i = 0;i < hmarkov->nb_state;i++) { + weight->mass[i] = 0.; + } + for (i = 1;i < hmarkov->nb_row;i++) { + if ((hmarkov->memo_type[i] == TERMINAL) || (hmarkov->memo_type[i] == COMPLETION)) { + weight->mass[hmarkov->state[i][0]] += hmarkov->initial[i]; + } + } + + weight->cumul_computation(); + weight->max_computation(); + break; + } + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + hmarkov->categorical_process[i]->weight = new Distribution(*weight); + hmarkov->categorical_process[i]->mixture = hmarkov->categorical_process[i]->mixture_computation(hmarkov->categorical_process[i]->weight); + } + + else if (hmarkov->discrete_parametric_process[i]) { + hmarkov->discrete_parametric_process[i]->weight = new Distribution(*weight); + hmarkov->discrete_parametric_process[i]->mixture = hmarkov->discrete_parametric_process[i]->mixture_computation(hmarkov->discrete_parametric_process[i]->weight); + } + + else if (hmarkov->continuous_parametric_process[i]) { + hmarkov->continuous_parametric_process[i]->weight = new Distribution(*weight); + } + } + + delete weight; + } + } + + return hmarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a hidden variable-order Markov chain using the MCEM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] ihmarkov initial hidden variable-order Markov chain, + * \param[in] global_initial_transition type of estimation of the initial transition probabilities (ordinary process case), + * \param[in] common_dispersion flag common dispersion parameter (continuous observation processes), + * \param[in] min_nb_state_sequence minimum number of generated sequences, + * \param[in] max_nb_state_sequence maximum number of generated sequences, + * \param[in] parameter parameter for defining the number of generated sequences, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] state_sequence flag on the computation of the restored state sequences, + * \param[in] nb_iter number of iterations. + * + * \return HiddenVariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +HiddenVariableOrderMarkov* MarkovianSequences::hidden_variable_order_markov_stochastic_estimation(StatError &error , ostream *os , + const HiddenVariableOrderMarkov &ihmarkov , + bool global_initial_transition , + bool common_dispersion , + int min_nb_state_sequence , + int max_nb_state_sequence , + double parameter , bool counting_flag , + bool state_sequence , int nb_iter) const + +{ + bool status; + int i , j , k , m; + int nb_terminal , iter , nb_state_sequence , memory , *state_seq , *pstate , + ***state_sequence_count , nb_element , **pioutput; + double likelihood = D_INF , previous_likelihood , observation_likelihood , **forward , + norm , **predicted , *backward , *cumul_backward , diff , variance , + **mean_direction , concentration , global_mean_direction , **proutput; + Distribution *weight; + ChainReestimation *chain_reestim; + Reestimation ***observation_reestim; + HiddenVariableOrderMarkov *hmarkov; + VariableOrderMarkovData *seq; + +# ifdef DEBUG + double sum; +# endif + + + hmarkov = NULL; + error.init(); + + // test number of values for each variable + + status = false; + for (i = 0;i < nb_variable;i++) { + if (max_value[i] > min_value[i]) { + status = true; + break; + } + } + + if (!status) { + error.update(STAT_error[STATR_VARIABLE_NB_VALUE]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (ihmarkov.nb_output_process != nb_variable) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + else { + for (i = 0;i < nb_variable;i++) { + if ((ihmarkov.categorical_process[i]) || (ihmarkov.discrete_parametric_process[i])) { + if (type[i] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (min_value[i] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (((ihmarkov.categorical_process[i]) && + (ihmarkov.categorical_process[i]->nb_value != marginal_distribution[i]->nb_value)) || + ((ihmarkov.discrete_parametric_process[i]) && + (ihmarkov.discrete_parametric_process[i]->nb_value < marginal_distribution[i]->nb_value))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + + else if ((ihmarkov.categorical_process[i]) && (!characteristics[i])) { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << j; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + } + } + + if ((min_nb_state_sequence < 1) || (min_nb_state_sequence > max_nb_state_sequence)) { + status = false; + error.update(SEQ_error[SEQR_MIN_NB_STATE_SEQUENCE]); + } + + if ((nb_iter != I_DEFAULT) && (nb_iter < 1)) { + status = false; + error.update(STAT_error[STATR_NB_ITERATION]); + } + + if (status) { + + // construction of the hidden variable-order Markov chain + + hmarkov = new HiddenVariableOrderMarkov(ihmarkov , false); + + if (hmarkov->type == EQUILIBRIUM) { + nb_terminal = (hmarkov->nb_row - 1) * (hmarkov->nb_state - 1) / hmarkov->nb_state + 1; + + for (i = 1;i < hmarkov->nb_row;i++) { + if (!hmarkov->child[i]) { + hmarkov->initial[i] = 1. / (double)nb_terminal; + } + else { + hmarkov->initial[i] = 0.; + } + } + } + + if (common_dispersion) { + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->continuous_parametric_process[i]) { + hmarkov->continuous_parametric_process[i]->tied_dispersion = true; + } + } + } + +# ifdef DEBUG + cout << *hmarkov; +# endif + + // construction of the data structures of the algorithm + + forward = new double*[max_length]; + for (i = 0;i < max_length;i++) { + forward[i] = new double[hmarkov->nb_row]; + } + + predicted = new double*[max_length]; + for (i = 0;i < max_length;i++) { + predicted[i] = new double[hmarkov->nb_row]; + } + + backward = new double[hmarkov->nb_row]; + cumul_backward = new double[hmarkov->nb_row]; + + state_seq = new int[max_length]; + + chain_reestim = new ChainReestimation(hmarkov->type , hmarkov->nb_state , hmarkov->nb_row); + + observation_reestim = new Reestimation**[hmarkov->nb_output_process]; + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (marginal_distribution[i]) { + observation_reestim[i] = new Reestimation*[hmarkov->nb_state]; + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j] = new Reestimation(marginal_distribution[i]->nb_value); + } + } + + else { + observation_reestim[i] = NULL; + } + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (!marginal_distribution[i]) { + break; + } + } + + if (i < hmarkov->nb_output_process) { + state_sequence_count = new int**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + state_sequence_count[i] = new int*[length[i]]; + for (j = 0;j < length[i];j++) { + state_sequence_count[i][j] = new int[hmarkov->nb_state]; + } + } + } + else { + state_sequence_count = NULL; + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if ((hmarkov->continuous_parametric_process[i]) && + (hmarkov->continuous_parametric_process[i]->ident == VON_MISES)) { + break; + } + } + + if (i < hmarkov->nb_output_process) { + mean_direction = new double*[hmarkov->nb_state]; + for (i = 0;i < hmarkov->nb_state;i++) { + mean_direction[i] = new double[4]; + } + } + else { + mean_direction = NULL; + } + + pioutput = new int*[nb_variable]; + proutput = new double*[nb_variable]; + + iter = 0; + do { + previous_likelihood = likelihood; + likelihood = 0.; + + // computation of the number of generated state sequences + + if (min_nb_state_sequence + (int)::round(parameter * iter) < max_nb_state_sequence) { + nb_state_sequence = min_nb_state_sequence + (int)::round(parameter * iter); + } + else { + nb_state_sequence = max_nb_state_sequence; + } + +/* nb_state_sequence = max_nb_state_sequence - (int)round((max_nb_state_sequence - min_nb_state_sequence) * + exp(-parameter * iter)); */ + + iter++; + + // initialization of the reestimation quantities + + chain_reestim->init(); + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (observation_reestim[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + for (k = 0;k < marginal_distribution[i]->nb_value;k++) { + observation_reestim[i][j]->frequency[k] = 0.; + } + } + } + } + + if (state_sequence_count) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < hmarkov->nb_state;k++) { + state_sequence_count[i][j][k] = 0; + } + } + } + } + + for (i = 0;i < nb_sequence;i++) { + + // forward recurrence + + for (j = 0;j < nb_variable;j++) { + switch (type[j]) { + case INT_VALUE : + pioutput[j] = int_sequence[i][j]; + break; + case REAL_VALUE : + proutput[j] = real_sequence[i][j]; + break; + } + } + + norm = 0.; + + switch (hmarkov->type) { + + case ORDINARY : { + for (j = 1;j < hmarkov->nb_row;j++) { + if (hmarkov->order[j] == 1) { + forward[0][j] = hmarkov->initial[hmarkov->state[j][0]]; + + for (k = 0;k < hmarkov->nb_output_process;k++) { + if (hmarkov->categorical_process[k]) { + forward[0][j] *= hmarkov->categorical_process[k]->observation[hmarkov->state[j][0]]->mass[*pioutput[k]]; + } + + else if (hmarkov->discrete_parametric_process[k]) { + forward[0][j] *= hmarkov->discrete_parametric_process[k]->observation[hmarkov->state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((hmarkov->continuous_parametric_process[k]->ident == GAMMA) || + (hmarkov->continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (min_value[k] < min_interval[k] / 2)) { + switch (type[k]) { + case INT_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + min_interval[k]); + break; + case REAL_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + min_interval[k]); + break; + } + } + + else { + switch (type[k]) { + case INT_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*pioutput[k] - min_interval[k] / 2 , *pioutput[k] + min_interval[k] / 2); + break; + case REAL_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*proutput[k] - min_interval[k] / 2 , *proutput[k] + min_interval[k] / 2); + break; + } + } + } + } + + norm += forward[0][j]; + } + + else { + forward[0][j] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < hmarkov->nb_row;j++) { + if (!(hmarkov->child[j])) { + forward[0][j] = hmarkov->initial[j]; + + for (k = 0;k < hmarkov->nb_output_process;k++) { + if (hmarkov->categorical_process[k]) { + forward[0][j] *= hmarkov->categorical_process[k]->observation[hmarkov->state[j][0]]->mass[*pioutput[k]]; + } + + else if (hmarkov->discrete_parametric_process[k]) { + forward[0][j] *= hmarkov->discrete_parametric_process[k]->observation[hmarkov->state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((hmarkov->continuous_parametric_process[k]->ident == GAMMA) || + (hmarkov->continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (min_value[k] < min_interval[k] / 2)) { + switch (type[k]) { + case INT_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + min_interval[k]); + break; + case REAL_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + min_interval[k]); + break; + } + } + + else { + switch (type[k]) { + case INT_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*pioutput[k] - min_interval[k] / 2 , *pioutput[k] + min_interval[k] / 2); + break; + case REAL_VALUE : + forward[0][j] *= hmarkov->continuous_parametric_process[k]->observation[hmarkov->state[j][0]]->mass_computation(*proutput[k] - min_interval[k] / 2 , *proutput[k] + min_interval[k] / 2); + break; + } + } + } + } + + norm += forward[0][j]; + } + + else { + forward[0][j] = 0.; + } + } + break; + } + } + + if (norm > 0.) { + for (j = 1;j < hmarkov->nb_row;j++) { + forward[0][j] /= norm; + } + + likelihood += log(norm); + } + + else { + likelihood = D_INF; + break; + } + + for (j = 1;j < length[i];j++) { + for (k = 0;k < nb_variable;k++) { + switch (type[k]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + norm = 0.; + + for (k = 1;k < hmarkov->nb_row;k++) { + forward[j][k] = 0.; + for (m = 0;m < hmarkov->nb_memory[k];m++) { + forward[j][k] += hmarkov->transition[hmarkov->previous[k][m]][hmarkov->state[k][0]] * + forward[j - 1][hmarkov->previous[k][m]]; + } + predicted[j][k] = forward[j][k]; + + for (m = 0;m < hmarkov->nb_output_process;m++) { + if (hmarkov->categorical_process[m]) { + forward[j][k] *= hmarkov->categorical_process[m]->observation[hmarkov->state[k][0]]->mass[*pioutput[m]]; + } + + else if (hmarkov->discrete_parametric_process[m]) { + forward[j][k] *= hmarkov->discrete_parametric_process[m]->observation[hmarkov->state[k][0]]->mass[*pioutput[m]]; + } + + else { + if (((hmarkov->continuous_parametric_process[m]->ident == GAMMA) || + (hmarkov->continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (min_value[m] < min_interval[m] / 2)) { + switch (type[m]) { + case INT_VALUE : + forward[j][k] *= hmarkov->continuous_parametric_process[m]->observation[hmarkov->state[k][0]]->mass_computation(*pioutput[m] , *pioutput[m] + min_interval[m]); + break; + case REAL_VALUE : + forward[j][k] *= hmarkov->continuous_parametric_process[m]->observation[hmarkov->state[k][0]]->mass_computation(*proutput[m] , *proutput[m] + min_interval[m]); + break; + } + } + + else { + switch (type[m]) { + case INT_VALUE : + forward[j][k] *= hmarkov->continuous_parametric_process[m]->observation[hmarkov->state[k][0]]->mass_computation(*pioutput[m] - min_interval[m] / 2 , *pioutput[m] + min_interval[m] / 2); + break; + case REAL_VALUE : + forward[j][k] *= hmarkov->continuous_parametric_process[m]->observation[hmarkov->state[k][0]]->mass_computation(*proutput[m] - min_interval[m] / 2 , *proutput[m] + min_interval[m] / 2); + break; + } + } + } + } + + norm += forward[j][k]; + } + + if (norm > 0.) { + for (k = 1;k < hmarkov->nb_row;k++) { + forward[j][k] /= norm; + } + likelihood += log(norm); + } + + else { + likelihood = D_INF; + break; + } + } + + if (likelihood == D_INF) { + break; + } + + // backward passes + + for (j = 0;j < nb_state_sequence;j++) { + k = length[i] - 1; + pstate = state_seq + k; + for (m = 0;m < nb_variable;m++) { + if (type[m] == INT_VALUE) { + pioutput[m] = int_sequence[i][m] + k; + } + } + + cumul_computation(hmarkov->nb_row - 1 , forward[k] + 1 , cumul_backward); + memory = 1 + cumul_method(hmarkov->nb_row - 1 , cumul_backward); + *pstate = hmarkov->state[memory][0]; + + // accumulation of the reestimation quantities of the observation distributions + + for (m = 0;m < hmarkov->nb_output_process;m++) { + if (observation_reestim[m]) { + (observation_reestim[m][*pstate]->frequency[*pioutput[m]])++; + } + } + + if (state_sequence_count) { + (state_sequence_count[i][k][*pstate])++; + } + + for (k = length[i] - 2;k >= 0;k--) { + for (m = 0;m < hmarkov->nb_memory[memory];m++) { + backward[m] = hmarkov->transition[hmarkov->previous[memory][m]][hmarkov->state[memory][0]] * + forward[k][hmarkov->previous[memory][m]] / predicted[k + 1][memory]; + } + +# ifdef DEBUG + sum = 0.; + for (m = 0;m < hmarkov->nb_memory[memory];m++) { + sum += backward[m]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << k << " " << sum << endl; + } +# endif + + cumul_computation(hmarkov->nb_memory[memory] , backward , cumul_backward); + memory = hmarkov->previous[memory][cumul_method(hmarkov->nb_memory[memory] , cumul_backward)]; + *--pstate = hmarkov->state[memory][0]; + + // accumulation of the reestimation quantities of the transition probabilities and + // the observation distributions + + (chain_reestim->transition[memory][*(pstate + 1)])++; + + for (m = 0;m < hmarkov->nb_output_process;m++) { + if (observation_reestim[m]) { + (observation_reestim[m][*pstate]->frequency[*--pioutput[m]])++; + } + } + + if (state_sequence_count) { + (state_sequence_count[i][k][*pstate])++; + } + } + + // accumulation of the reestimation quantities of the initial probabilities + + if (hmarkov->type == ORDINARY) { + (chain_reestim->initial[*pstate])++; + } + } + } + + if (likelihood != D_INF) { + + // reestimation of the initial probabilities + + if (hmarkov->type == ORDINARY) { + reestimation(hmarkov->nb_state , chain_reestim->initial , + hmarkov->initial , MIN_PROBABILITY , false); + } + + // reestimation of the transition probabilities + + for (i = hmarkov->nb_row - 1;i >= 1;i--) { + if (hmarkov->memo_type[i] == COMPLETION) { +/* if ((hmarkov->memo_type[i] == COMPLETION) || ((hmarkov->type == ORDINARY) && + (global_initial_transition) && (hmarkov->order[i] > 1))) { */ + for (j = 0;j < hmarkov->nb_state;j++) { + chain_reestim->transition[hmarkov->parent[i]][j] += chain_reestim->transition[i][j]; + } + } + } + + for (i = 1;i < hmarkov->nb_row;i++) { + if ((hmarkov->memo_type[i] == TERMINAL) || ((hmarkov->type == ORDINARY) && + (hmarkov->memo_type[i] == NON_TERMINAL))) { + reestimation(hmarkov->nb_state , chain_reestim->transition[i] , + hmarkov->transition[i] , MIN_PROBABILITY , false); + } + else if (hmarkov->memo_type[i] == COMPLETION) { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->transition[i][j] = hmarkov->transition[hmarkov->parent[i]][j]; + } + } + } + + if (hmarkov->type == EQUILIBRIUM) { + hmarkov->initial_probability_computation(); + } + + // reestimation of the observation distributions + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + reestimation(marginal_distribution[i]->nb_value , observation_reestim[i][j]->frequency , + hmarkov->categorical_process[i]->observation[j]->mass , + MIN_PROBABILITY , false); + } + } + + else if (observation_reestim[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j]->nb_value_computation(); + observation_reestim[i][j]->offset_computation(); + observation_reestim[i][j]->nb_element_computation(); + observation_reestim[i][j]->max_computation(); + if ((hmarkov->discrete_parametric_process[i]) || + (hmarkov->continuous_parametric_process[i]->ident != ZERO_INFLATED_GAMMA)) { + observation_reestim[i][j]->mean_computation(); +// observation_reestim[i][j]->variance_computation(); + observation_reestim[i][j]->variance_computation(true); + } + } + + if (hmarkov->discrete_parametric_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_likelihood = observation_reestim[i][j]->type_parametric_estimation(hmarkov->discrete_parametric_process[i]->observation[j] , + 0 , true , OBSERVATION_THRESHOLD); + + if (observation_likelihood != D_INF) { + hmarkov->discrete_parametric_process[i]->observation[j]->computation(marginal_distribution[i]->nb_value , + OBSERVATION_THRESHOLD); + + if (hmarkov->discrete_parametric_process[i]->observation[j]->ident == BINOMIAL) { + for (k = hmarkov->discrete_parametric_process[i]->observation[j]->nb_value;k < marginal_distribution[i]->nb_value;k++) { + hmarkov->discrete_parametric_process[i]->observation[j]->mass[k] = 0.; + } + } + } + } + } + + else { + switch (hmarkov->continuous_parametric_process[i]->ident) { + + case GAMMA : { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j]->gamma_estimation(hmarkov->continuous_parametric_process[i]->observation[j] , iter); + } + break; + } + + case ZERO_INFLATED_GAMMA : { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j]->zero_inflated_gamma_estimation(hmarkov->continuous_parametric_process[i]->observation[j] , iter); + } + break; + } + + case GAUSSIAN : { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->location = observation_reestim[i][j]->mean; + } + + if (common_dispersion) { + variance = 0.; + nb_element = 0; + + for (j = 0;j < hmarkov->nb_state;j++) { + for (k = observation_reestim[i][j]->offset;k < observation_reestim[i][j]->nb_value;k++) { + diff = k - observation_reestim[i][j]->mean; + variance += observation_reestim[i][j]->frequency[k] * diff * diff; + } + + nb_element += observation_reestim[i][j]->nb_element; + } + + variance /= nb_element; +// variance /= (nb_element - 1); + + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = sqrt(variance); + } + } + + else { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = sqrt(observation_reestim[i][j]->variance); + if (hmarkov->continuous_parametric_process[i]->observation[j]->dispersion / + hmarkov->continuous_parametric_process[i]->observation[j]->location < GAUSSIAN_MIN_VARIATION_COEFF) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = hmarkov->continuous_parametric_process[i]->observation[j]->location * GAUSSIAN_MIN_VARIATION_COEFF; + } + } + } + break; + } + + case VON_MISES : { + for (j = 0;j < hmarkov->nb_state;j++) { + observation_reestim[i][j]->mean_direction_computation(mean_direction[j]); + hmarkov->continuous_parametric_process[i]->observation[j]->location = mean_direction[j][3]; + } + + if (common_dispersion) { + global_mean_direction = 0.; + nb_element = 0; + + for (j = 0;j < hmarkov->nb_state;j++) { + global_mean_direction += observation_reestim[i][j]->nb_element * mean_direction[j][2]; + nb_element += observation_reestim[i][j]->nb_element; + } + concentration = von_mises_concentration_computation(global_mean_direction / nb_element); + + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = concentration; + } + } + + else { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->continuous_parametric_process[i]->observation[j]->dispersion = von_mises_concentration_computation(mean_direction[j][2]); + } + } + break; + } + } + } + } + + else { + switch (hmarkov->continuous_parametric_process[i]->ident) { + case GAMMA : + gamma_estimation(state_sequence_count , i , + hmarkov->continuous_parametric_process[i] , iter); + break; + case ZERO_INFLATED_GAMMA : + zero_inflated_gamma_estimation(state_sequence_count , i , + hmarkov->continuous_parametric_process[i] , iter); + break; + case GAUSSIAN : + gaussian_estimation(state_sequence_count , i , + hmarkov->continuous_parametric_process[i]); + break; + case VON_MISES : + von_mises_estimation(state_sequence_count , i , + hmarkov->continuous_parametric_process[i]); + break; + } + } + } + } + + if (os) { + *os << STAT_label[STATL_ITERATION] << " " << iter << " " + << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << ": " << likelihood + << " (" << nb_state_sequence << ")" << endl; + } + +# ifdef DEBUG + if (iter % 5 == 0) { + cout << *hmarkov; + } +# endif + + } + while ((likelihood != D_INF) && (((nb_iter == I_DEFAULT) && (iter < VARIABLE_ORDER_MARKOV_NB_ITER) && + ((likelihood - previous_likelihood) / -likelihood > VARIABLE_ORDER_MARKOV_LIKELIHOOD_DIFF)) || + ((nb_iter != I_DEFAULT) && (iter < nb_iter)))); + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << iter << " " << STAT_label[STATL_ITERATIONS] << endl; + } + + // reestimation of the initial probabilities + + if (hmarkov->type == ORDINARY) { + reestimation(hmarkov->nb_state , chain_reestim->initial , + hmarkov->initial , MIN_PROBABILITY , true); + } + + // reestimation of the transition probabilities + + if ((hmarkov->type == ORDINARY) && (global_initial_transition)) { + for (i = hmarkov->nb_row - 1;i >= 1;i--) { + if ((hmarkov->memo_type[i] != COMPLETION) && (hmarkov->order[i] > 1)) { + for (j = 0;j < hmarkov->nb_state;j++) { + chain_reestim->transition[hmarkov->parent[i]][j] += chain_reestim->transition[i][j]; + } + } + } + } + + for (i = 1;i < hmarkov->nb_row;i++) { + if ((hmarkov->memo_type[i] == TERMINAL) || ((hmarkov->type == ORDINARY) && + (hmarkov->memo_type[i] == NON_TERMINAL))) { + reestimation(hmarkov->nb_state , chain_reestim->transition[i] , + hmarkov->transition[i] , MIN_PROBABILITY , true); + } + else if (hmarkov->memo_type[i] == COMPLETION) { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->transition[i][j] = hmarkov->transition[hmarkov->parent[i]][j]; + } + } + } + + if (hmarkov->type == EQUILIBRIUM) { + hmarkov->initial_probability_computation(); + } + + // reestimation of the categorical observation distributions + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + reestimation(marginal_distribution[i]->nb_value , observation_reestim[i][j]->frequency , + hmarkov->categorical_process[i]->observation[j]->mass , + MIN_PROBABILITY , true); + } + } + + else if (hmarkov->discrete_parametric_process[i]) { + hmarkov->discrete_parametric_process[i]->nb_value_computation(); + } + } + } + + // destruction of the data structures of the algorithm + + for (i = 0;i < max_length;i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 0;i < max_length;i++) { + delete [] predicted[i]; + } + delete [] predicted; + + delete [] backward; + delete [] cumul_backward; + + delete [] state_seq; + + delete chain_reestim; + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (observation_reestim[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + delete observation_reestim[i][j]; + } + delete [] observation_reestim[i]; + } + } + delete [] observation_reestim; + + if (state_sequence_count) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + delete [] state_sequence_count[i][j]; + } + delete [] state_sequence_count[i]; + } + delete [] state_sequence_count; + } + + if (mean_direction) { + for (i = 0;i < hmarkov->nb_state;i++) { + delete [] mean_direction[i]; + } + delete [] mean_direction; + } + + delete [] pioutput; + delete [] proutput; + + if (likelihood == D_INF) { + delete hmarkov; + hmarkov = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + + else { + if (state_sequence) { + hmarkov->markov_data = new VariableOrderMarkovData(*this , ADD_STATE_VARIABLE , + (hmarkov->type == EQUILIBRIUM ? true : false)); + seq = hmarkov->markov_data; + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (((hmarkov->discrete_parametric_process[i]) || (hmarkov->continuous_parametric_process[i])) && + (seq->characteristics[i + 1])) { + delete seq->characteristics[i + 1]; + seq->characteristics[i + 1] = NULL; + } + } + + hmarkov->forward_backward(*seq); + + hmarkov->create_cumul(); + hmarkov->log_computation(); + hmarkov->viterbi(*seq); + hmarkov->remove_cumul(); + + seq->min_value_computation(0); + seq->max_value_computation(0); + seq->build_marginal_frequency_distribution(0); + seq->build_characteristic(0); + + seq->build_transition_count(*hmarkov); + seq->build_observation_frequency_distribution(hmarkov->nb_state); + seq->build_observation_histogram(hmarkov->nb_state); + + // computation of the mixtures of observation distributions (weights deduced from the restoration) + + weight = seq->weight_computation(); + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + hmarkov->categorical_process[i]->restoration_weight = new Distribution(*weight); + hmarkov->categorical_process[i]->restoration_mixture = hmarkov->categorical_process[i]->mixture_computation(hmarkov->categorical_process[i]->restoration_weight); + } + + else if (hmarkov->discrete_parametric_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->discrete_parametric_process[i]->observation[j]->cumul_computation(); + } + + hmarkov->discrete_parametric_process[i]->restoration_weight = new Distribution(*weight); + hmarkov->discrete_parametric_process[i]->restoration_mixture = hmarkov->discrete_parametric_process[i]->mixture_computation(hmarkov->discrete_parametric_process[i]->restoration_weight); + } + + else if (hmarkov->continuous_parametric_process[i]) { + hmarkov->continuous_parametric_process[i]->restoration_weight = new Distribution(*weight); + } + } + + delete weight; + + if (os) { + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD] << ": " << seq->restoration_likelihood; + + for (i = 0;i < nb_variable;i++) { + if (type[i] == REAL_VALUE) { + break; + } + } + if (i == nb_variable) { + *os << " | " << hmarkov->VariableOrderMarkov::likelihood_computation(*seq); + } + *os << endl; + } + } + + else { + hmarkov->markov_data = new VariableOrderMarkovData(*this , SEQUENCE_COPY , + (hmarkov->type == EQUILIBRIUM ? true : false)); + seq = hmarkov->markov_data; + if (seq->type[0] == STATE) { + seq->state_variable_init(INT_VALUE); + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (((hmarkov->discrete_parametric_process[i]) || (hmarkov->continuous_parametric_process[i])) && + (seq->characteristics[i])) { + delete seq->characteristics[i]; + seq->characteristics[i] = NULL; + } + } + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + for (j = 0;j < hmarkov->nb_state;j++) { + hmarkov->categorical_process[i]->observation[j]->cumul_computation(); + + hmarkov->categorical_process[i]->observation[j]->max_computation(); +// hmarkov->categorical_process[i]->observation[j]->mean_computation(); +// hmarkov->categorical_process[i]->observation[j]->variance_computation(); + } + } + } + + // computation of the log-likelihood and the characteristic distributions of the model + + seq->likelihood = hmarkov->likelihood_computation(*this , seq->posterior_probability); + +# ifdef DEBUG +// cout << *hmarkov; + cout << "iteration " << iter << " " + << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << ": " << seq->likelihood << endl; +# endif + + if ((os) && (state_sequence) && (seq->nb_sequence <= POSTERIOR_PROBABILITY_NB_SEQUENCE)) { + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] << endl; + for (i = 0;i < seq->nb_sequence;i++) { + *os << SEQ_label[SEQL_SEQUENCE] << " " << seq->identifier[i] << ": " + << seq->posterior_probability[i] << endl; + } + } + + hmarkov->component_computation(); + hmarkov->characteristic_computation(*seq , counting_flag , I_DEFAULT , false); + + // computation of the mixtures of observation distributions (theoretical weights) + + switch (hmarkov->type) { + + case ORDINARY : { + weight = hmarkov->state_process->weight_computation(); + break; + } + + case EQUILIBRIUM : { + weight = new Distribution(hmarkov->nb_state); + + for (i = 0;i < hmarkov->nb_state;i++) { + weight->mass[i] = 0.; + } + for (i = 1;i < hmarkov->nb_row;i++) { + if ((hmarkov->memo_type[i] == TERMINAL) || (hmarkov->memo_type[i] == COMPLETION)) { + weight->mass[hmarkov->state[i][0]] += hmarkov->initial[i]; + } + } + + weight->cumul_computation(); + weight->max_computation(); + break; + } + } + + for (i = 0;i < hmarkov->nb_output_process;i++) { + if (hmarkov->categorical_process[i]) { + hmarkov->categorical_process[i]->weight = new Distribution(*weight); + hmarkov->categorical_process[i]->mixture = hmarkov->categorical_process[i]->mixture_computation(hmarkov->categorical_process[i]->weight); + } + + else if (hmarkov->discrete_parametric_process[i]) { + hmarkov->discrete_parametric_process[i]->weight = new Distribution(*weight); + hmarkov->discrete_parametric_process[i]->mixture = hmarkov->discrete_parametric_process[i]->mixture_computation(hmarkov->discrete_parametric_process[i]->weight); + } + + else if (hmarkov->continuous_parametric_process[i]) { + hmarkov->continuous_parametric_process[i]->weight = new Distribution(*weight); + } + } + + delete weight; + } + } + + return hmarkov; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/hvomc_algorithms2.cpp b/src/cpp/sequence_analysis/hvomc_algorithms2.cpp new file mode 100644 index 0000000..dd7cf1d --- /dev/null +++ b/src/cpp/sequence_analysis/hvomc_algorithms2.cpp @@ -0,0 +1,5997 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "hidden_variable_order_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state sequence entropies using the forward-backward algorithm. + * + * \param[in] seq reference on a VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +void HiddenVariableOrderMarkov::forward_backward(VariableOrderMarkovData &seq) const + +{ + int i , j , k , m; + int **pioutput; + double seq_likelihood , observation , **forward , norm , **predicted , buff , + *transition_predicted , **forward_state_entropy , **proutput; + +# ifdef MESSAGE + double entropy , **backward , *auxiliary , **transition_entropy; +# endif + + + // initializations + + seq.entropy = new double[seq.nb_sequence]; + seq.nb_state_sequence = new double[seq.nb_sequence]; + + forward = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + forward[i] = new double[nb_row]; + } + + predicted = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + predicted[i] = new double[nb_row]; + } + + transition_predicted = new double[nb_row]; + + forward_state_entropy = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + forward_state_entropy[i] = new double[nb_row]; + } + +# ifdef MESSAGE + backward = new double*[seq.max_length]; + for (i = 0;i < seq.max_length;i++) { + backward[i] = new double[nb_row]; + } + + auxiliary = new double[nb_row]; + + transition_entropy = new double*[nb_row]; + for (i = 1;i < nb_row;i++) { + transition_entropy[i] = new double[nb_state]; + } +# endif + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + seq.sample_entropy = 0.; + + for (i = 0;i < seq.nb_sequence;i++) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j + 1]; + break; + } + } + + // forward recurrence + + seq_likelihood = 0.; + norm = 0.; + + switch (type) { + + case ORDINARY : { + for (j = 1;j < nb_row;j++) { + if (order[j] == 1) { + forward[0][j] = initial[state[j][0]]; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + forward[0][j] *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + forward[0][j] *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + norm += forward[0][j]; + } + + else { + forward[0][j] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + forward[0][j] = initial[j]; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + forward[0][j] *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + forward[0][j] *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + norm += forward[0][j]; + } + + else { + forward[0][j] = 0.; + } + } + break; + } + } + + if (norm > 0.) { + for (j = 1;j < nb_row;j++) { + forward[0][j] /= norm; + } + + seq_likelihood += log(norm); + } + + else { + seq_likelihood = D_INF; + } + + if (seq_likelihood != D_INF) { + for (j = 1;j < nb_row;j++) { + forward_state_entropy[0][j] = 0.; + } + + for (j = 1;j < seq.length[i];j++) { + for (k = 0;k < nb_output_process;k++) { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + norm = 0.; + + for (k = 1;k < nb_row;k++) { + forward[j][k] = 0.; + for (m = 0;m < nb_memory[k];m++) { + transition_predicted[m] = transition[previous[k][m]][state[k][0]] * forward[j - 1][previous[k][m]]; + forward[j][k] += transition_predicted[m]; + +// forward[j][k] += transition[previous[k][m]][state[k][0]] * forward[j - 1][previous[k][m]]; + } + predicted[j][k] = forward[j][k]; + + forward_state_entropy[j][k] = 0.; + if (predicted[j][k] > 0.) { + for (m = 0;m < nb_memory[k];m++) { + if (transition_predicted[m] > 0.) { + buff = transition_predicted[m] / predicted[j][k]; + forward_state_entropy[j][k] += buff * (forward_state_entropy[j - 1][previous[k][m]] - log(buff)); + } + } + + if (forward_state_entropy[j][k] < 0.) { + forward_state_entropy[j][k] = 0.; + } + } + + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + forward[j][k] *= categorical_process[m]->observation[state[k][0]]->mass[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + forward[j][k] *= discrete_parametric_process[m]->observation[state[k][0]]->mass[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + forward[j][k] *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1]); + break; + case REAL_VALUE : + forward[j][k] *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1]); + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + forward[j][k] *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2); + break; + case REAL_VALUE : + forward[j][k] *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2); + break; + } + } + } + } + + norm += forward[j][k]; + } + + if (norm > 0.) { + for (k = 1;k < nb_row;k++) { + forward[j][k] /= norm; + } + seq_likelihood += log(norm); + } + + else { + seq_likelihood = D_INF; + break; + } + } + + seq.entropy[i] = 0.; + j = seq.length[i] - 1; + for (k = 1;k < nb_row;k++) { + if (forward[j][k] > 0.) { + seq.entropy[i] += forward[j][k] * (forward_state_entropy[j][k] - log(forward[j][k])); + } + } + seq.sample_entropy += seq.entropy[i]; + +# ifdef DEBUG + cout << "\n"; + for (j = 0;j < seq.length[i];j++) { + cout << j << " |"; + for (k = 1;k < nb_row;k++) { + cout << " " << forward_state_entropy[j][k]; + } + cout << endl; + } +# endif + + } + + // backward recurrence + + if (seq_likelihood != D_INF) { + +# ifdef MESSAGE + entropy = 0.; + + for (j = 1;j < nb_row;j++) { + for (k = 0;k < nb_state;k++) { + transition_entropy[j][k] = 0.; + } + } + + j = seq.length[i] - 1; + for (k = 1;k < nb_row;k++) { + backward[j][k] = forward[j][k]; + + if (backward[j][k] > 0.) { + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + if (categorical_process[m]->observation[state[k][0]]->mass[*pioutput[m]] > 0.) { + entropy -= backward[j][k] * log(categorical_process[m]->observation[state[k][0]]->mass[*pioutput[m]]); + } + } + + else if (discrete_parametric_process[m]) { + if (discrete_parametric_process[m]->observation[state[k][0]]->mass[*pioutput[m]] > 0.) { + entropy -= backward[j][k] * log(discrete_parametric_process[m]->observation[state[k][0]]->mass[*pioutput[m]]); + } + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1])); + break; + case REAL_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1])); + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2)); + break; + case REAL_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2)); + break; + } + } + } + } + } + } + + for (j = seq.length[i] - 2;j >= 0;j--) { + for (k = 0;k < nb_output_process;k++) { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]--; + break; + case REAL_VALUE : + proutput[k]--; + break; + } + } + + for (k = 1;k < nb_row;k++) { + if (predicted[j + 1][k] > 0.) { + auxiliary[k] = backward[j + 1][k] / predicted[j + 1][k]; + } + else { + auxiliary[k] = 0.; + } + } + + for (k = 1;k < nb_row;k++) { + backward[j][k] = 0.; + + if (next[k]) { + for (m = 0;m < nb_state;m++) { + buff = auxiliary[next[k][m]] * transition[k][m] * forward[j][k]; + backward[j][k] += buff; + transition_entropy[k][m] += buff; + } + + if (backward[j][k] > 0.) { + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + if (categorical_process[m]->observation[state[k][0]]->mass[*pioutput[m]] > 0.) { + entropy -= backward[j][k] * log(categorical_process[m]->observation[state[k][0]]->mass[*pioutput[m]]); + } + } + + else if (discrete_parametric_process[m]) { + if (discrete_parametric_process[m]->observation[state[k][0]]->mass[*pioutput[m]] > 0.) { + entropy -= backward[j][k] * log(discrete_parametric_process[m]->observation[state[k][0]]->mass[*pioutput[m]]); + } + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1])); + break; + case REAL_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1])); + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2)); + break; + case REAL_VALUE : + entropy -= backward[j][k] * log(continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2)); + break; + } + } + } + } + } + } + } + } + + for (j = 1;j < nb_row;j++) { + switch (type) { + + case ORDINARY : { + if ((order[j] == 1) && (initial[state[j][0]] > 0.)) { + entropy -= backward[0][j] * log(initial[state[j][0]]); + } + break; + } + + case EQUILIBRIUM : { + if ((!child[j]) && (initial[j] > 0.)) { + entropy -= backward[0][j] * log(initial[j]); + } + break; + } + } + } + + for (j = 1;j < nb_row;j++) { + for (k = 0;k < nb_state;k++) { + if (transition[j][k] > 0.) { + entropy -= transition_entropy[j][k] * log(transition[j][k]); + } + } + } + + entropy += seq_likelihood; + + if ((entropy < seq.entropy[i] - DOUBLE_ERROR) || (entropy > seq.entropy[i] + DOUBLE_ERROR)) { + cout << "\nERROR: " << i << " " << seq.entropy[i] << " " << entropy << endl; + } +# endif + + // computation of the number of state sequences + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j + 1]; + break; + } + } + + // forward recurrence + + switch (type) { + + case ORDINARY : { + for (j = 1;j < nb_row;j++) { + if (order[j] == 1) { + forward[0][j] = initial[state[j][0]]; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + forward[0][j] *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + forward[0][j] *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + if (forward[0][j] > 0.) { + forward[0][j] = 1.; + } + } + + else { + forward[0][j] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + forward[0][j] = initial[j]; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + forward[0][j] *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + forward[0][j] *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + forward[0][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + if (forward[0][j] > 0.) { + forward[0][j] = 1.; + } + } + + else { + forward[0][j] = 0.; + } + } + break; + } + } + + for (j = 1;j < seq.length[i];j++) { + for (k = 0;k < nb_output_process;k++) { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + + for (k = 1;k < nb_row;k++) { + forward[j][k] = 0.; + for (m = 0;m < nb_memory[k];m++) { + if (transition[previous[k][m]][state[k][0]] > 0.) { + forward[j][k] += forward[j - 1][previous[k][m]]; + } + } + + observation = 1.; + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + observation *= categorical_process[m]->observation[state[k][0]]->mass[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + observation *= discrete_parametric_process[m]->observation[state[k][0]]->mass[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + observation *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1]); + break; + case REAL_VALUE : + observation *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1]); + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + observation *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2); + break; + case REAL_VALUE : + observation *= continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2); + break; + } + } + } + } + + if (observation == 0.) { + forward[j][k] = 0.; + } + } + } + + seq.nb_state_sequence[i] = 0.; + j = seq.length[i] - 1; + for (k = 1;k < nb_row;k++) { + seq.nb_state_sequence[i] += forward[j][k]; + } + } + } + + for (i = 0;i < seq.max_length;i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 0;i < seq.max_length;i++) { + delete [] predicted[i]; + } + delete [] predicted; + + delete [] transition_predicted; + + for (i = 0;i < seq.max_length;i++) { + delete [] forward_state_entropy[i]; + } + delete [] forward_state_entropy; + +# ifdef MESSAGE + for (i = 0;i < seq.max_length;i++) { + delete [] backward[i]; + } + delete [] backward; + + delete [] auxiliary; + + for (i = 1;i < nb_row;i++) { + delete [] transition_entropy[i]; + } + delete [] transition_entropy; +# endif + + delete [] pioutput; + delete [] proutput; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of state and entropy profiles. + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] nb_state number of states, + * \param[in] profiles pointer on the state profiles, + * \param[in] begin_conditional_entropy pointer on the profiles of entropies conditional on the past, + * \param[in] marginal_entropy pointer on the marginal entropy profiles, + * \param[in] begin_partial_entropy pointer on the profiles of partial entropies conditional on the past, + * \param[in] end_conditional_entropy pointer on the profiles of entropies conditional on the future, + * \param[in] end_partial_entropy pointer on the profiles of partial entropies conditional on the future. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::profile_ascii_print(ostream &os , int index , int nb_state , + double **profiles , double *begin_conditional_entropy , + double *marginal_entropy , double *begin_partial_entropy , + double *end_conditional_entropy , double *end_partial_entropy) const + +{ + int i , j; + int buff , *width; + ios_base::fmtflags format_flags; + + + format_flags = os.flags(ios::adjustfield); + + // computation of the column widths + + width = new int[nb_variable + 8]; + + for (i = 0;i < nb_variable;i++) { + if (type[i] != REAL_VALUE) { + width[i] = column_width((int)max_value[i]); + } + else { + width[i] = column_width(length[index] , real_sequence[index][i]); + } + + if (i > 0) { + width[i] += ASCII_SPACE; + } + } + + if (index_parameter) { + width[nb_variable] = column_width(index_parameter_distribution->nb_value - 1) + ASCII_SPACE; + } + else { + width[nb_variable] = column_width(max_length) + ASCII_SPACE; + } + + width[nb_variable + 1] = 0; + for (i = 0;i < length[index];i++) { + buff = column_width(nb_state , profiles[i]); + if (buff > width[nb_variable + 1]) { + width[nb_variable + 1] = buff; + } + } + width[nb_variable + 1] += ASCII_SPACE; + + width[nb_variable + 2] = column_width(length[index] , begin_conditional_entropy) + ASCII_SPACE; + width[nb_variable + 3] = column_width(length[index] , marginal_entropy) + ASCII_SPACE; + width[nb_variable + 4] = column_width(length[index] , begin_partial_entropy) + ASCII_SPACE; + + width[nb_variable + 5] = column_width(nb_sequence); + + if ((end_conditional_entropy) && (end_partial_entropy)) { + width[nb_variable + 6] = column_width(length[index] , end_conditional_entropy) + ASCII_SPACE; + width[nb_variable + 7] = column_width(length[index] , end_partial_entropy) + ASCII_SPACE; + } + + os << SEQ_label[SEQL_OPTIMAL] << " " << STAT_label[STATL_STATE]; + for (i = 1;i < nb_variable;i++) { + os << " | " << STAT_label[STATL_VARIABLE] << " " << i; + } + if (index_param_type == TIME) { + os << " | " << SEQ_label[SEQL_TIME]; + } + else { + os << " | " << SEQ_label[SEQL_INDEX]; + } + for (i = 0;i < nb_state;i++) { + os << " | " << STAT_label[STATL_STATE] << " " << i; + } + os << " | " << SEQ_label[SEQL_CONDITIONAL_ENTROPY] << " | " << SEQ_label[SEQL_MARGINAL_ENTROPY] + << " | " << SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY] << endl; + + for (i = 0;i < length[index];i++) { + os.setf(ios::right , ios::adjustfield); + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + os << setw(width[j]) << int_sequence[index][j][i]; + } + else { + os << setw(width[j]) << real_sequence[index][j][i]; + } + } + os << setw(width[nb_variable]) << (index_parameter ? index_parameter[index][i] : i) << " "; + + os.setf(ios::left , ios::adjustfield); + for (j = 0;j < nb_state;j++) { + os << setw(width[nb_variable + 1]) << profiles[i][j]; + } + + if ((end_conditional_entropy) && (end_partial_entropy)) { + os << setw(width[nb_variable + 2]) << begin_conditional_entropy[i]; + os << setw(width[nb_variable + 6]) << end_conditional_entropy[i]; + os << setw(width[nb_variable + 3]) << marginal_entropy[i]; + os << setw(width[nb_variable + 4]) << begin_partial_entropy[i]; + os << setw(width[nb_variable + 7]) << end_partial_entropy[i]; + } + else { + os << setw(width[nb_variable + 2]) << begin_conditional_entropy[i]; + os << setw(width[nb_variable + 3]) << marginal_entropy[i]; + os << setw(width[nb_variable + 4]) << begin_partial_entropy[i]; + } + + if (i == 0) { + os.setf(ios::right , ios::adjustfield); + os << setw(width[nb_variable + 5]) << identifier[index]; + } + os << endl; + } + + delete [] width; + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of state and entropy profiles at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] nb_state number of states, + * \param[in] profiles pointer on the state profiles, + * \param[in] begin_conditional_entropy pointer on the profiles of entropies conditional on the past, + * \param[in] marginal_entropy pointer on the marginal entropy profiles, + * \param[in] begin_partial_entropy pointer on the profiles of partial entropies conditional on the past, + * \param[in] end_conditional_entropy pointer on the profiles of entropies conditional on the future, + * \param[in] end_partial_entropy pointer on the profiles of partial entropies conditional on the future. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::profile_spreadsheet_print(ostream &os , int index , int nb_state , + double **profiles , double *begin_conditional_entropy , + double *marginal_entropy , double *begin_partial_entropy , + double *end_conditional_entropy , double *end_partial_entropy) const + +{ + int i , j; + + + os << SEQ_label[SEQL_OPTIMAL] << " " << STAT_label[STATL_STATE]; + for (i = 1;i < nb_variable;i++) { + os << "\t" << STAT_label[STATL_VARIABLE] << " " << i; + } + if (index_param_type == TIME) { + os << "\t" << SEQ_label[SEQL_TIME]; + } + else { + os << "\t" << SEQ_label[SEQL_INDEX]; + } + for (i = 0;i < nb_state;i++) { + os << "\t" << STAT_label[STATL_STATE] << " " << i; + } + os << "\t" << SEQ_label[SEQL_CONDITIONAL_ENTROPY] << "\t" << SEQ_label[SEQL_MARGINAL_ENTROPY] + << "\t" << SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY] << endl; + + for (i = 0;i < length[index];i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + os << int_sequence[index][j][i] << "\t"; + } + else { + os << real_sequence[index][j][i] << "\t"; + } + } + os << (index_parameter ? index_parameter[index][i] : i); + + for (j = 0;j < nb_state;j++) { + os << "\t" << profiles[i][j]; + } + + if ((end_conditional_entropy) && (end_partial_entropy)) { + os << "\t" << begin_conditional_entropy[i] << "\t" << end_conditional_entropy[i] << "\t" << marginal_entropy[i] + << "\t" << begin_partial_entropy[i] << "\t" << end_partial_entropy[i]; + } + else { + os << "\t" << begin_conditional_entropy[i] << "\t" << marginal_entropy[i] + << "\t" << begin_partial_entropy[i]; + } + + if (i == 0) { + os << "\t" << identifier[index]; + } + os << endl; + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of state and entropy profiles at the Gnuplot format. + * + * \param[in,out] os stream, + * \param[in] index sequence index, + * \param[in] nb_state number of states, + * \param[in] profiles pointer on the state profiles, + * \param[in] begin_conditional_entropy pointer on the profiles of entropies conditional on the past, + * \param[in] marginal_entropy pointer on the marginal entropy profiles, + * \param[in] begin_partial_entropy pointer on the profiles of partial entropies conditional on the past, + * \param[in] end_conditional_entropy pointer on the profiles of entropies conditional on the future, + * \param[in] end_partial_entropy pointer on the profiles of partial entropies conditional on the future. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::profile_plot_print(ostream &os , int index , int nb_state , + double **profiles , double *begin_conditional_entropy , + double *marginal_entropy , double *begin_partial_entropy , + double *end_conditional_entropy , double *end_partial_entropy) const + +{ + int i , j; + + + for (i = 0;i < length[index];i++) { + if (index_parameter) { + os << index_parameter[index][i] << " "; + } + + for (j = 0;j < nb_state;j++) { + os << profiles[i][j] << " "; + } + + if ((end_conditional_entropy) && (end_partial_entropy)) { + os << begin_conditional_entropy[i] << " " << end_conditional_entropy[i] << " " + << marginal_entropy[i] << " " << begin_partial_entropy[i] << " " + << end_partial_entropy[i] << endl; + } + else { + os << begin_conditional_entropy[i] << " " << marginal_entropy[i] << " " + << begin_partial_entropy[i] << endl; + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of state profiles for plots. + * + * \param[in] plot reference on a MultiPlot object, + * \param[in] index sequence index, + * \param[in] nb_state number of states, + * \param[in] profiles pointer on the state profiles. + */ +/*--------------------------------------------------------------*/ + +void Sequences::profile_plotable_write(MultiPlot &plot , int index , int nb_state , + double **profiles) const + +{ + int i , j; + + + plot.resize(nb_state); + + if (index_parameter) { + for (i = 0;i < length[index];i++) { + for (j = 0;j < nb_state;j++) { + plot[j].add_point(index_parameter[index][i] , profiles[i][j]); + } + } + } + + else { + for (i = 0;i < length[index];i++) { + for (j = 0;j < nb_state;j++) { + plot[j].add_point(i , profiles[i][j]); + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of entropy profiles for plots. + * + * \param[in] plot reference on a MultiPlot object, + * \param[in] index sequence index, + * \param[in] begin_entropy pointer on the profiles of (partial) entropies conditional on the past, + * \param[in] end_entropy pointer on the profiles of (partial) entropies conditional on the future, + * \param[in] marginal_entropy pointer on the marginal entropy profiles. + */ +/*--------------------------------------------------------------*/ + +void Sequences::entropy_profile_plotable_write(MultiPlot &plot , int index , + double *begin_entropy , double *end_entropy , + double *marginal_entropy) const + +{ + int i , j; + int nb_plot; + + + nb_plot = 1; + if (end_entropy) { + nb_plot++; + } + if (marginal_entropy) { + nb_plot++; + } + plot.resize(nb_plot); + + if (index_parameter) { + for (i = 0;i < length[index];i++) { + plot[0].add_point(index_parameter[index][i] , begin_entropy[i]); + } + + i = 1; + if (end_entropy) { + for (j = 0;j < length[index];j++) { + plot[i].add_point(index_parameter[index][j] , end_entropy[j]); + } + i++; + } + + if (marginal_entropy) { + for (j = 0;j < length[index];j++) { + plot[i].add_point(index_parameter[index][j] , marginal_entropy[j]); + } + } + } + + else { + for (i = 0;i < length[index];i++) { + plot[0].add_point(i , begin_entropy[i]); + } + + i = 1; + if (end_entropy) { + for (j = 0;j < length[index];j++) { + plot[i].add_point(j , end_entropy[j]); + } + i++; + } + + if (marginal_entropy) { + for (j = 0;j < length[index];j++) { + plot[i].add_point(j , marginal_entropy[j]); + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index, + * \param[in] os stream, + * \param[in] plot_set pointer on a MultiPlotSet object, + * \param[in] format output format (ASCII/SPREADSHEET/GNUPLOT/PLOT), + * \param[in] max_marginal_entropy reference on the maximum marginal entropy, + * \param[in] entropy1 reference on the entropy (for the plots). + * + * \return log-likelihood for the observed sequence. + */ +/*--------------------------------------------------------------*/ + +double HiddenVariableOrderMarkov::forward_backward(MarkovianSequences &seq , int index , + ostream *os , MultiPlotSet *plot_set , + output_format format , double &max_marginal_entropy , + double &entropy1) const + +{ + int i , j , k; + int *pstate , **pioutput; + double seq_likelihood , state_seq_likelihood , **forward , norm , **predicted , + entropy2 , buff , **backward , *auxiliary , backward_max , **state_backward , + *transition_predicted , **forward_state_entropy , **transition_entropy , + *begin_partial_entropy , *begin_conditional_entropy , *end_backward , + *end_partial_entropy , *end_conditional_entropy , *marginal_entropy , **proutput; +// double **backward_state_entropy; + + + // initializations + + forward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + forward[i] = new double[nb_row]; + } + + predicted = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + predicted[i] = new double[nb_row]; + } + + backward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward[i] = new double[nb_row]; + } + + auxiliary = new double[nb_row]; + + state_backward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + state_backward[i] = new double[nb_state]; + } + + transition_predicted = new double[nb_row]; + + forward_state_entropy = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + forward_state_entropy[i] = new double[nb_row]; + } + + transition_entropy = new double*[nb_row]; + for (i = 1;i < nb_row;i++) { + transition_entropy[i] = new double[nb_state]; + } + + begin_partial_entropy = new double[seq.length[index]]; + begin_conditional_entropy = new double[seq.length[index]]; + +/* backward_state_entropy = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward_state_entropy[i] = new double[nb_row]; + } */ + + end_backward = new double[nb_row]; + end_partial_entropy = new double[seq.length[index]]; + end_conditional_entropy = new double[seq.length[index]]; + + marginal_entropy = new double[seq.length[index]]; + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + + // forward recurrence + + seq_likelihood = 0.; + norm = 0.; + + switch (type) { + + case ORDINARY : { + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + forward[0][i] = initial[state[i][0]]; + + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + forward[0][i] *= categorical_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + forward[0][i] *= discrete_parametric_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + } + } + + norm += forward[0][i]; + } + + else { + forward[0][i] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + forward[0][i] = initial[i]; + + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + forward[0][i] *= categorical_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + forward[0][i] *= discrete_parametric_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + } + } + + norm += forward[0][i]; + } + + else { + forward[0][i] = 0.; + } + } + break; + } + } + + if (norm > 0.) { + for (i = 1;i < nb_row;i++) { + forward[0][i] /= norm; + } + + seq_likelihood += log(norm); + } + + else { + seq_likelihood = D_INF; + } + + if (seq_likelihood != D_INF) { + for (i = 1;i < nb_row;i++) { + forward_state_entropy[0][i] = 0.; + } + + for (i = 1;i < seq.length[index];i++) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + norm = 0.; + + for (j = 1;j < nb_row;j++) { + forward[i][j] = 0.; + for (k = 0;k < nb_memory[j];k++) { + transition_predicted[k] = transition[previous[j][k]][state[j][0]] * forward[i - 1][previous[j][k]]; + forward[i][j] += transition_predicted[k]; + +// forward[i][j] += transition[previous[j][k]][state[j][0]] * forward[i - 1][previous[j][k]]; + } + predicted[i][j] = forward[i][j]; + + forward_state_entropy[i][j] = 0.; + if (predicted[i][j] > 0.) { + for (k = 0;k < nb_memory[j];k++) { + if (transition_predicted[k] > 0.) { + buff = transition_predicted[k] / predicted[i][j]; + forward_state_entropy[i][j] += buff * (forward_state_entropy[i - 1][previous[j][k]] - log(buff)); + } + } + + if (forward_state_entropy[i][j] < 0.) { + forward_state_entropy[i][j] = 0.; + } + } + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + forward[i][j] *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + forward[i][j] *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[i][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + forward[i][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[i][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + forward[i][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + norm += forward[i][j]; + } + + if (norm > 0.) { + for (j = 1;j < nb_row;j++) { + forward[i][j] /= norm; + } + seq_likelihood += log(norm); + } + + else { + seq_likelihood = D_INF; + break; + } + } + + entropy1 = 0.; + i = seq.length[index] - 1; + for (j = 1;j < nb_row;j++) { + if (forward[i][j] > 0.) { + entropy1 += forward[i][j] * (forward_state_entropy[i][j] - log(forward[i][j])); + } + } + +# ifdef DEBUG + cout << "\n"; + for (i = 0;i < seq.length[index];i++) { + cout << i << " |"; + for (j = 1;j < nb_row;j++) { + cout << " " << forward_state_entropy[i][j]; + } + cout << endl; + } +# endif + + } + + // backward recurrence + + if (seq_likelihood != D_INF) { + entropy2 = 0.; + + for (i = 1;i < nb_row;i++) { + for (j = 0;j < nb_state;j++) { + transition_entropy[i][j] = 0.; + } + } + + i = seq.length[index] - 1; + for (j = 1;j < nb_row;j++) { + backward[i][j] = forward[i][j]; + + if (backward[i][j] > 0.) { + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + if (categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]] > 0.) { + entropy2 -= backward[i][j] * log(categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]); + } + } + + else if (discrete_parametric_process[k]) { + if (discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]] > 0.) { + entropy2 -= backward[i][j] * log(discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]); + } + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1])); + break; + case REAL_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1])); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2)); + break; + case REAL_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2)); + break; + } + } + } + } + } + +// backward_state_entropy[i][j] = 0.; + } + + for (i = seq.length[index] - 2;i >= 0;i--) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]--; + break; + case REAL_VALUE : + proutput[j]--; + break; + } + } + + for (j = 1;j < nb_row;j++) { + if (predicted[i + 1][j] > 0.) { + auxiliary[j] = backward[i + 1][j] / predicted[i + 1][j]; + } + else { + auxiliary[j] = 0.; + } + } + + for (j = 1;j < nb_row;j++) { + backward[i][j] = 0.; +// backward_state_entropy[i][j] = 0.; + + if (next[j]) { +// norm = 0.; + + for (k = 0;k < nb_state;k++) { +/* transition_predicted[k] = auxiliary[next[j][k]] * transition[j][k]; + norm += transition_predicted[k]; */ + + buff = auxiliary[next[j][k]] * transition[j][k] * forward[i][j]; + backward[i][j] += buff; + transition_entropy[j][k] += buff; + +/* if (transition[j][k] > 0.) { + entropy2 -= buff * log(transition[j][k]); + } */ + } + +/* if (norm > 0.) { + for (k = 0;k < nb_state;k++) { + if (transition_predicted[k] > 0.) { + buff = transition_predicted[k] / norm; + backward_state_entropy[i][j] += buff * (backward_state_entropy[i + 1][next[j][k]] - log(buff)); + } + } + + if (backward_state_entropy[i][j] < 0.) { + backward_state_entropy[i][j] = 0.; + } + } */ + + if (backward[i][j] > 0.) { + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + if (categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]] > 0.) { + entropy2 -= backward[i][j] * log(categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]); + } + } + + else if (discrete_parametric_process[k]) { + if (discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]] > 0.) { + entropy2 -= backward[i][j] * log(discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]); + } + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1])); + break; + case REAL_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1])); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2)); + break; + case REAL_VALUE : + entropy2 -= backward[i][j] * log(continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2)); + break; + } + } + } + } + } + } + } + } + + for (i = 1;i < nb_row;i++) { + switch (type) { + + case ORDINARY : { + if ((order[i] == 1) && (initial[state[i][0]] > 0.)) { + entropy2 -= backward[0][i] * log(initial[state[i][0]]); + } + break; + } + + case EQUILIBRIUM : { + if ((!child[i]) && (initial[i] > 0.)) { + entropy2 -= backward[0][i] * log(initial[i]); + } + break; + } + } + } + + for (i = 1;i < nb_row;i++) { + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > 0.) { + entropy2 -= transition_entropy[i][j] * log(transition[i][j]); + } + } + } + + entropy2 += seq_likelihood; + +# ifdef MESSAGE + if ((entropy2 < entropy1 - DOUBLE_ERROR) || (entropy2 > entropy1 + DOUBLE_ERROR)) { + cout << "\nERROR: " << entropy1 << " " << entropy2 << endl; + } +# endif + + for (i = 0;i < seq.length[index];i++) { + begin_partial_entropy[i] = 0.; + for (j = 1;j < nb_row;j++) { + if (backward[i][j] > 0.) { + begin_partial_entropy[i] += backward[i][j] * (forward_state_entropy[i][j] - log(backward[i][j])); + } + } + if (begin_partial_entropy[i] < 0.) { + begin_partial_entropy[i] = 0.; + } + } + + begin_conditional_entropy[0] = 0.; + for (i = 1;i < nb_row;i++) { + if (backward[0][i] > 0.) { + begin_conditional_entropy[0] -= backward[0][i] * log(backward[0][i]); + } + } + if (begin_conditional_entropy[0] < 0.) { + begin_conditional_entropy[0] = 0.; + } + + for (i = 1;i < seq.length[index];i++) { + begin_conditional_entropy[i] = 0.; + for (j = 1;j < nb_row;j++) { + for (k = 0;k < nb_memory[j];k++) { + if ((predicted[i][j] > 0.) && (backward[i - 1][previous[j][k]] > 0.)) { + buff = backward[i][j] * transition[previous[j][k]][state[j][0]] * + forward[i - 1][previous[j][k]] / predicted[i][j]; + if (buff > 0.) { + begin_conditional_entropy[i] -= buff * log(buff / backward[i - 1][previous[j][k]]); + } + } + } + } + if (begin_conditional_entropy[i] < 0.) { + begin_conditional_entropy[i] = 0.; + } + } + +/* for (i = 0;i < seq.length[index];i++) { + end_partial_entropy[i] = begin_partial_entropy[seq.length[index] - 1]; + for (j = 1;j < nb_row;j++) { + if (backward[i][j] > 0.) { + end_partial_entropy[i - order[j] + 1] -= backward[i][j] * forward_state_entropy[i][j]; + } + } + if (end_partial_entropy[i] < 0.) { + end_partial_entropy[i] = 0.; + } + } + + for (i = 0;i < seq.length[index];i++) { + end_partial_entropy[i] = 0.; + } + + for (i = 0;i < seq.length[index] - 1;i++) { + for (j = 1;j < nb_row;j++) { + if (backward[i][j] > 0.) { + end_partial_entropy[i - order[j] + 1] += backward[i][j] * (backward_state_entropy[i][j] - log(backward[i][j])); + } + } + } + + i = seq.length[index] - 1; + for (j = 1;j < nb_row;j++) { + if (end_backward[j] > 0.) { + end_partial_entropy[i - order[j] + 1] -= end_backward[j] * log(end_backward[j]); + } + } + + for (i = 0;i < seq.length[index];i++) { + if (end_partial_entropy[i] < 0.) { + end_partial_entropy[i] = 0.; + } + } */ + + for (i = 0;i < seq.length[index];i++) { + end_conditional_entropy[i] = 0.; + } + + for (i = 0;i < seq.length[index] - 1;i++) { + for (j = 1;j < nb_row;j++) { + if ((next[j]) && (i - order[j] + 1 >= 0)) { + for (k = 0;k < nb_state;k++) { + if (predicted[i + 1][next[j][k]] > 0.) { + buff = transition[j][k] * forward[i][j] / predicted[i + 1][next[j][k]]; + if (buff > 0.) { + end_conditional_entropy[i - order[j] + 1] -= (backward[i + 1][next[j][k]] * buff) * log(buff); + } + } + } + } + } + } + + i = seq.length[index] - 1; + end_backward[0] = 0.; + for (j = 1;j < nb_row;j++) { + end_backward[j] = backward[i][j]; + } + for (j = nb_row - 1;j >= 1;j--) { + end_backward[parent[j]] += end_backward[j]; + } + +# ifdef DEBUG + cout << "\nTEST sum to 1: " << end_backward[0] << endl; +# endif + + for (j = 1;j < nb_row;j++) { + if ((i - order[j] + 1 >= 0) && (end_backward[j] > 0.) && (end_backward[parent[j]] > 0.)) { + end_conditional_entropy[i - order[j] + 1] -= end_backward[j] * log(end_backward[j] / end_backward[parent[j]]); + } + } + + for (i = 0;i < seq.length[index];i++) { + if (end_conditional_entropy[i] < 0.) { + end_conditional_entropy[i] = 0.; + } + } + +# ifdef MESSAGE + buff = begin_conditional_entropy[0]; + if ((buff < begin_partial_entropy[0] - DOUBLE_ERROR) || (buff > begin_partial_entropy[0] + DOUBLE_ERROR)) { + cout << "\nERROR: " << 0 << " | " << buff << " " << begin_partial_entropy[0] << endl; + } + for (i = 1;i < seq.length[index];i++) { + buff += begin_conditional_entropy[i]; + if ((buff < begin_partial_entropy[i] - DOUBLE_ERROR) || (buff > begin_partial_entropy[i] + DOUBLE_ERROR)) { + cout << "\nERROR: " << i << " | " << buff << " " << begin_partial_entropy[i] << endl; + } + } + +/* i = seq.length[index] - 1; + buff = end_conditional_entropy[i]; + if ((buff < end_partial_entropy[i] - DOUBLE_ERROR) || (buff > end_partial_entropy[i] + DOUBLE_ERROR)) { + cout << "\nERROR: " << i << " | " << buff << " " << end_partial_entropy[i] << endl; + } + for (i = seq.length[index] - 2;i >= 0;i--) { + buff += end_conditional_entropy[i]; + if ((buff < end_partial_entropy[i] - DOUBLE_ERROR) || (buff > end_partial_entropy[i] + DOUBLE_ERROR)) { + cout << "\nERROR: " << i << " | " << buff << " " << end_partial_entropy[i] << endl; + } + } */ +# endif + + // restoration of the most probable state sequence + + pstate = seq.int_sequence[index][0]; + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + state_backward[i][j] = 0.; + } + for (j = 1;j < nb_row;j++) { + state_backward[i][state[j][0]] += backward[i][j]; + } + } + + for (i = 0;i < seq.length[index];i++) { + backward_max = 0.; + for (j = 0;j < nb_state;j++) { + if (state_backward[i][j] > backward_max) { + backward_max = state_backward[i][j]; + *pstate = j; + } + } + + pstate++; + } + + seq.min_value[0] = 0; + seq.max_value[0] = nb_state - 1; + seq.build_marginal_frequency_distribution(0); + + state_seq_likelihood = VariableOrderMarkov::likelihood_computation(seq , index); + +/* begin_conditional_entropy[0] = begin_partial_entropy[0]; + for (i = 1;i < seq.length[index];i++) { + begin_conditional_entropy[i] = begin_partial_entropy[i] - begin_partial_entropy[i - 1]; + } */ + + begin_partial_entropy[0] = begin_conditional_entropy[0]; + for (i = 1;i < seq.length[index];i++) { + begin_partial_entropy[i] = begin_partial_entropy[i - 1] + begin_conditional_entropy[i]; + } + + end_partial_entropy[seq.length[index] - 1] = end_conditional_entropy[seq.length[index] - 1]; + for (i = seq.length[index] - 2;i >= 0;i--) { + end_partial_entropy[i] = end_partial_entropy[i + 1] + end_conditional_entropy[i]; + } + + max_marginal_entropy = 0.; + for (i = 0;i < seq.length[index];i++) { + marginal_entropy[i] = 0.; +/* for (j = 0;j < nb_state;j++) { + if (state_backward[i][j] > 0.) { + marginal_entropy[i] -= state_backward[i][j] * log(state_backward[i][j]); + } + } */ + for (j = 1;j < nb_row;j++) { + if (backward[i][j] > 0.) { + marginal_entropy[i] -= backward[i][j] * log(backward[i][j]); + } + } + if (marginal_entropy[i] > max_marginal_entropy) { + max_marginal_entropy = marginal_entropy[i]; + } + if (marginal_entropy[i] < 0.) { + marginal_entropy[i] = 0.; + } + } + + switch (format) { + + case ASCII : { + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\n\n"; +// seq.profile_ascii_print(*os , index , nb_state , state_backward , +// STAT_label[STATL_STATE]); + seq.profile_ascii_print(*os , index , nb_state , state_backward , begin_conditional_entropy , + marginal_entropy , begin_partial_entropy , end_conditional_entropy , + end_partial_entropy); + + *os << "\n" << STAT_label[STATL_LIKELIHOOD] << ": " << seq_likelihood + << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_LIKELIHOOD] << ": " << state_seq_likelihood + << " (" << exp(state_seq_likelihood - seq_likelihood) << ")" << endl; + break; + } + + case SPREADSHEET : { + *os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\n\n"; +// seq.profile_spreadsheet_print(*os , index , nb_state , state_backward , +// STAT_label[STATL_STATE]); + seq.profile_spreadsheet_print(*os , index , nb_state , state_backward , begin_conditional_entropy , + marginal_entropy , begin_partial_entropy , end_conditional_entropy , + end_partial_entropy); + + *os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << seq_likelihood + << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_LIKELIHOOD] << "\t" << state_seq_likelihood + << "\t" << exp(state_seq_likelihood - seq_likelihood) << endl; + break; + } + + case GNUPLOT : { +// seq.profile_plot_print(*os , index , nb_state , state_backward); + seq.profile_plot_print(*os , index , nb_state , state_backward , begin_conditional_entropy , + marginal_entropy , begin_partial_entropy , end_conditional_entropy , + end_partial_entropy); + break; + } + + case PLOT : { + seq.profile_plotable_write((*plot_set)[1] , index , nb_state , state_backward); + seq.entropy_profile_plotable_write((*plot_set)[2] , index , begin_conditional_entropy , + end_conditional_entropy , marginal_entropy); + seq.entropy_profile_plotable_write((*plot_set)[3] , index , begin_partial_entropy , + end_partial_entropy); + break; + } + } + + if (format != GNUPLOT) { +/* double gini_index; + + gini_index = 0.; + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + gini_index += state_backward[i][j] * (1. - state_backward[i][j]); + } + } */ + + double entropy3 , observation , nb_state_sequence; + + entropy3 = 0.; + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + if (state_backward[i][j] > 0.) { + entropy3 -= state_backward[i][j] * log(state_backward[i][j]); + } + } + } + + // computation of the number of state sequences + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + + // forward recurrence + + switch (type) { + + case ORDINARY : { + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + forward[0][i] = initial[state[i][0]]; + + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + forward[0][i] *= categorical_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + forward[0][i] *= discrete_parametric_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + } + } + + if (forward[0][i] > 0.) { + forward[0][i] = 1.; + } + } + + else { + forward[0][i] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + forward[0][i] = initial[i]; + + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + forward[0][i] *= categorical_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + forward[0][i] *= discrete_parametric_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + } + } + + if (forward[0][i] > 0.) { + forward[0][i] = 1.; + } + } + + else { + forward[0][i] = 0.; + } + } + break; + } + } + + for (i = 1;i < seq.length[index];i++) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + + for (j = 1;j < nb_row;j++) { + forward[i][j] = 0.; + for (k = 0;k < nb_memory[j];k++) { + if (transition[previous[j][k]][state[j][0]] > 0.) { + forward[i][j] += forward[i - 1][previous[j][k]]; + } + } + + observation = 1.; + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + observation *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + observation *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + observation *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + observation *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + observation *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + observation *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + if (observation == 0.) { + forward[i][j] = 0.; + } + } + } + + nb_state_sequence = 0.; + i = seq.length[index] - 1; + for (j = 1;j < nb_row;j++) { + nb_state_sequence += forward[i][j]; + } + + switch (format) { + case ASCII : +/* *os << "\n" << SEQ_label[SEQL_GINI_INDEX] << ": " << gini_index << " (" + << gini_index / seq.length[index] */ + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << entropy1 + << " (" << entropy1 / seq.length[index] << ") " << SEQ_label[SEQL_UPPER_BOUND] << ": " + << log((double)nb_state_sequence) << " (" + << log((double)nb_state_sequence) / seq.length[index] + << ")\n" << SEQ_label[SEQL_MARGINAL_ENTROPY_SUM] << ": " << entropy3 << " (" + << entropy3 / seq.length[index] << ")\n\n" + << SEQ_label[SEQL_NB_STATE_SEQUENCE] << ": " << nb_state_sequence << endl; + break; + case SPREADSHEET : +/* *os << "\n" << SEQ_label[SEQL_GINI_INDEX] << "\t" << gini_index << "\t" + << gini_index / seq.length[index] */ + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << "\t" << entropy1 + << "\t" << entropy1 / seq.length[index] << "\t" << SEQ_label[SEQL_UPPER_BOUND] << "\t" + << log((double)nb_state_sequence) << "\t" + << log((double)nb_state_sequence) / seq.length[index] + << "\n" << SEQ_label[SEQL_MARGINAL_ENTROPY_SUM] << "\t" << entropy3 << "\t" + << entropy3 / seq.length[index] << "\n\n" + << SEQ_label[SEQL_NB_STATE_SEQUENCE] << "\t" << nb_state_sequence << endl; + break; + } + } + } + + for (i = 0;i < seq.length[index];i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 0;i < seq.length[index];i++) { + delete [] predicted[i]; + } + delete [] predicted; + + for (i = 0;i < seq.length[index];i++) { + delete [] backward[i]; + } + delete [] backward; + + delete [] auxiliary; + + for (i = 0;i < seq.length[index];i++) { + delete [] state_backward[i]; + } + delete [] state_backward; + + delete [] transition_predicted; + + for (i = 0;i < seq.length[index];i++) { + delete [] forward_state_entropy[i]; + } + delete [] forward_state_entropy; + + for (i = 1;i < nb_row;i++) { + delete [] transition_entropy[i]; + } + delete [] transition_entropy; + + delete [] begin_partial_entropy; + delete [] begin_conditional_entropy; + +/* for (i = 0;i < seq.length[index];i++) { + delete [] backward_state_entropy[i]; + } + delete [] backward_state_entropy; */ + + delete [] end_backward; + delete [] end_partial_entropy; + delete [] end_conditional_entropy; + + delete [] marginal_entropy; + + delete [] pioutput; + delete [] proutput; + + return seq_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation of state sequences for an observed sequence using + * the forward-backward algorithm for sampling. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index, + * \param[in] os stream, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] nb_state_sequence number of state sequences. + * + * \return log-likelihood for the observed sequence. + */ +/*--------------------------------------------------------------*/ + +double HiddenVariableOrderMarkov::forward_backward_sampling(const MarkovianSequences &seq , int index , + ostream &os , output_format format , + int nb_state_sequence) const + +{ + int i , j , k; + int memory , *pstate , **pioutput; + double seq_likelihood , state_seq_likelihood , **forward , norm , **predicted , + *backward , *cumul_backward , **proutput; + +# ifdef DEBUG + double sum; +# endif + + + // initializations + + forward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + forward[i] = new double[nb_row]; + } + + predicted = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + predicted[i] = new double[nb_row]; + } + + backward = new double[nb_row]; + cumul_backward = new double[nb_row]; + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + +# ifdef DEBUG + double **state_sequence_probability; + + + state_sequence_probability = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + state_sequence_probability[i] = new double[nb_state]; + for (j = 0;j < nb_state;j++) { + state_sequence_probability[i][j] = 0.; + } + } +# endif + + // forward recurrence + + seq_likelihood = 0.; + norm = 0.; + + switch (type) { + + case ORDINARY : { + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + forward[0][i] = initial[state[i][0]]; + + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + forward[0][i] *= categorical_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + forward[0][i] *= discrete_parametric_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + } + } + + norm += forward[0][i]; + } + + else { + forward[0][i] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + forward[0][i] = initial[i]; + + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + forward[0][i] *= categorical_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + forward[0][i] *= discrete_parametric_process[j]->observation[state[i][0]]->mass[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + forward[0][i] *= continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + } + } + + norm += forward[0][i]; + } + + else { + forward[0][i] = 0.; + } + } + break; + } + } + + if (norm > 0.) { + for (i = 1;i < nb_row;i++) { + forward[0][i] /= norm; + } + + seq_likelihood += log(norm); + } + + else { + seq_likelihood = D_INF; + } + + if (seq_likelihood != D_INF) { + for (i = 1;i < seq.length[index];i++) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + norm = 0.; + + for (j = 1;j < nb_row;j++) { + forward[i][j] = 0.; + for (k = 0;k < nb_memory[j];k++) { + forward[i][j] += transition[previous[j][k]][state[j][0]] * forward[i - 1][previous[j][k]]; + } + predicted[i][j] = forward[i][j]; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + forward[i][j] *= categorical_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + forward[i][j] *= discrete_parametric_process[k]->observation[state[j][0]]->mass[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[i][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + forward[i][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + forward[i][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + forward[i][j] *= continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + } + } + + norm += forward[i][j]; + } + + if (norm > 0.) { + for (j = 1;j < nb_row;j++) { + forward[i][j] /= norm; + } + seq_likelihood += log(norm); + } + + else { + seq_likelihood = D_INF; + break; + } + } + } + + if (seq_likelihood != D_INF) { + +# ifdef MESSAGE + cout << "\n"; +# endif + + // backward passes + + for (i = 0;i < nb_state_sequence;i++) { + j = seq.length[index] - 1; + pstate = seq.int_sequence[index][0] + j; + stat_tool::cumul_computation(nb_row - 1 , forward[j] + 1 , cumul_backward); + memory = 1 + cumul_method(nb_row - 1 , cumul_backward); + *pstate = state[memory][0]; + + for (j = seq.length[index] - 2;j >= 0;j--) { + for (k = 0;k < nb_memory[memory];k++) { + backward[k] = transition[previous[memory][k]][state[memory][0]] * + forward[j][previous[memory][k]] / predicted[j + 1][memory]; + } + +# ifdef DEBUG + sum = 0.; + for (k = 0;k < nb_memory[memory];k++) { + sum += backward[k]; + } + if ((sum < 1. - DOUBLE_ERROR) || (sum > 1. + DOUBLE_ERROR)) { + cout << "\nERROR: " << j << " " << sum << endl; + } +# endif + + stat_tool::cumul_computation(nb_memory[memory] , backward , cumul_backward); + memory = previous[memory][cumul_method(nb_memory[memory] , cumul_backward)]; + *--pstate = state[memory][0]; + } + +# ifdef DEBUG + pstate = seq.int_sequence[index][0]; + for (j = 0;j < seq.length[index];j++) { + state_sequence_probability[j][*pstate++]++; + } +# endif + +# ifdef MESSAGE + state_seq_likelihood = VariableOrderMarkov::likelihood_computation(seq , index); + + pstate = seq.int_sequence[index][0]; + + switch (format) { + + case ASCII : { + for (j = 0;j < seq.length[index];j++) { + os << *pstate++ << " "; + } + + os << " " << i + 1 << " " << state_seq_likelihood + << " (" << exp(state_seq_likelihood - seq_likelihood) << ")" << endl; + break; + } + + case SPREADSHEET : { + for (j = 0;j < seq.length[index];j++) { + os << *pstate++ << "\t"; + } + + os << "\t" << i + 1 << "\t" << state_seq_likelihood + << "\t" << exp(state_seq_likelihood - seq_likelihood) << endl; + break; + } + } +# endif + + } + +# ifdef DEBUG + if (nb_state_sequence >= 1000) { + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + state_sequence_probability[i][j] /= nb_state_sequence; + } + } + + pstate = seq.int_sequence[index][0]; + for (j = 0;j < seq.length[index];j++) { + *pstate++ = I_DEFAULT; + } + + os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + seq.profile_ascii_print(os , index , nb_state , state_sequence_probability , + STAT_label[STATL_STATE]); + } +# endif + + } + + for (i = 0;i < seq.length[index];i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 0;i < seq.length[index];i++) { + delete [] predicted[i]; + } + delete [] predicted; + + delete [] backward; + delete [] cumul_backward; + + delete [] pioutput; + delete [] proutput; + +# ifdef DEBUG + for (i = 0;i < seq.length[index];i++) { + delete [] state_sequence_probability[i]; + } + delete [] state_sequence_probability; +# endif + + return seq_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-parameters of a hidden variable-order Markov chain. + */ +/*--------------------------------------------------------------*/ + +void HiddenVariableOrderMarkov::log_computation() + +{ + int i , j; + + + Chain::log_computation(); + + for (i = 0;i < nb_output_process;i++) { + if (categorical_process[i]) { + for (j = 0;j < nb_state;j++) { + categorical_process[i]->observation[j]->log_computation(); + } + } + + else if (discrete_parametric_process[i]) { + for (j = 0;j < nb_state;j++) { + stat_tool::log_computation(discrete_parametric_process[i]->nb_value , + discrete_parametric_process[i]->observation[j]->mass , + discrete_parametric_process[i]->observation[j]->cumul); + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the most probable state sequences using the Viterbi algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] posterior_probability pointer on the posterior probabilities of the most probable state sequences, + * \param[in] index sequence index. + * + * \return log-likelihood for the most probable state sequences. + */ +/*--------------------------------------------------------------*/ + +double HiddenVariableOrderMarkov::viterbi(const MarkovianSequences &seq , + double *posterior_probability , int index) const + +{ + int i , j , k , m; + int length , memory , *pstate , **pioutput , **optimal_memory; + double likelihood = 0. , buff , forward_max , *forward , *previous_forward , **proutput; + + + // initializations + + forward = new double[nb_row]; + previous_forward = new double[nb_row]; + + length = (index == I_DEFAULT ? seq.max_length : seq.length[index]); + + optimal_memory = new int*[length]; + for (i = 0;i < length;i++) { + optimal_memory[i] = new int[nb_row]; + } + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + for (i = 0;i < seq.nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j + 1]; + break; + } + } + + // forward recurrence + + switch (type) { + + case ORDINARY : { + for (j = 1;j < nb_row;j++) { + if (order[j] == 1) { + forward[j] = cumul_initial[state[j][0]]; + + if (forward[j] != D_INF) { + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + buff = categorical_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + buff = discrete_parametric_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[j] = D_INF; + break; + } + else { + forward[j] += buff; + } + } + } + } + + else { + forward[j] = D_INF; + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + forward[j] = cumul_initial[j]; + + if (forward[j] != D_INF) { + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + buff = categorical_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + buff = discrete_parametric_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[j] = D_INF; + break; + } + else { + forward[j] += buff; + } + } + } + } + + else { + forward[j] = D_INF; + } + } + break; + } + } + +# ifdef DEBUG + cout << "\n" << 0 << " : "; + for (j = 1;j < nb_row;j++) { + cout << forward[j] << " | "; + } + cout << endl; +# endif + + for (j = 1;j < seq.length[i];j++) { + for (k = 0;k < nb_output_process;k++) { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]++; + break; + case REAL_VALUE : + proutput[k]++; + break; + } + } + + for (k = 1;k < nb_row;k++) { + previous_forward[k] = forward[k]; + } + + for (k = 1;k < nb_row;k++) { + forward[k] = D_INF; + for (m = 0;m < nb_memory[k];m++) { + buff = cumul_transition[previous[k][m]][state[k][0]] + previous_forward[previous[k][m]]; + if (buff > forward[k]) { + forward[k] = buff; + optimal_memory[j][k] = previous[k][m]; + } + } + + if (forward[k] != D_INF) { + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + buff = categorical_process[m]->observation[state[k][0]]->cumul[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + buff = discrete_parametric_process[m]->observation[state[k][0]]->cumul[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1]); + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[m]->observation[state[k][0]]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[k] = D_INF; + break; + } + else { + forward[k] += buff; + } + } + } + } + +# ifdef DEBUG + cout << j << " : "; + for (k = 1;k < nb_row;k++) { + cout << forward[k] << " " << optimal_memory[j][k] << " | "; + } + cout << endl; +# endif + + } + + // extraction of the log-likelihood for the most probable state sequence + + pstate = seq.int_sequence[i][0] + seq.length[i] - 1; + forward_max = D_INF; + + for (j = 1;j < nb_row;j++) { + if (forward[j] > forward_max) { + forward_max = forward[j]; + memory = j; + } + } + + if (forward_max != D_INF) { + likelihood += forward_max; + *pstate = state[memory][0]; + if (posterior_probability) { + posterior_probability[i] = forward_max; + } + } + + else { + likelihood = D_INF; + if (posterior_probability) { + posterior_probability[i] = 0.; + } + break; + } + + // restoration of the most probable state sequence + + for (j = seq.length[i] - 1;j > 0;j--) { + memory = optimal_memory[j][memory]; + *--pstate = state[memory][0]; + } + +# ifdef DEBUG + cout << "\n"; + for (j = seq.length[i] - 1;j >= 0;j--) { + cout << seq.int_sequence[i][0][j] << " "; + } + cout << endl; +# endif + + } + } + + delete [] forward; + delete [] previous_forward; + + for (i = 0;i < length;i++) { + delete [] optimal_memory[i]; + } + delete [] optimal_memory; + + delete [] pioutput; + delete [] proutput; + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the most probable state sequences using the Viterbi algorithm. + * + * \param[in] seq reference on a VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +void HiddenVariableOrderMarkov::viterbi(VariableOrderMarkovData &seq) const + +{ + seq.posterior_probability = new double[seq.nb_sequence]; + seq.restoration_likelihood = viterbi(seq , seq.posterior_probability); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the N most probable state sequences for + * an observed sequence using the generalized Viterbi algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index, + * \param[in] os stream, + * \param[in] seq_likelihood log-likelihood for the observed sequence, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] inb_state_sequence number of state sequences. + * + * \return log-likelihood for the most probable state sequence. + */ +/*--------------------------------------------------------------*/ + +double HiddenVariableOrderMarkov::generalized_viterbi(const MarkovianSequences &seq , int index , + ostream &os , double seq_likelihood , + output_format format , int inb_state_sequence) const + +{ + bool **active_cell; + int i , j , k , m; + int nb_state_sequence , memory , brank , previous_rank , nb_cell , *rank , *pstate , + **pioutput , ***optimal_memory , ***optimal_rank; + double buff , observation , forward_max , state_seq_likelihood , likelihood_cumul , + **forward , **previous_forward , **proutput; + + + // initializations + + forward = new double*[nb_row]; + forward[0] = NULL; + for (i = 1;i < nb_row;i++) { + forward[i] = new double[inb_state_sequence]; + } + + previous_forward = new double*[nb_row]; + previous_forward[0] = NULL; + for (i = 1;i < nb_row;i++) { + previous_forward[i] = new double[inb_state_sequence]; + for (j = 1;j < inb_state_sequence;j++) { + previous_forward[i][j] = D_INF; + } + } + + rank = new int[nb_row]; + + optimal_memory = new int**[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + optimal_memory[i] = new int*[nb_row]; + optimal_memory[i][0] = NULL; + for (j = 1;j < nb_row;j++) { + optimal_memory[i][j] = new int[inb_state_sequence]; + } + } + + optimal_rank = new int**[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + optimal_rank[i] = new int*[nb_row]; + optimal_rank[i][0] = NULL; + for (j = 1;j < nb_row;j++) { + optimal_rank[i][j] = new int[inb_state_sequence]; + } + } + + active_cell = new bool*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + active_cell[i] = new bool[nb_state]; + for (j = 0;j < nb_state;j++) { + active_cell[i][j] = false; + } + } + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + +# ifdef DEBUG + double entropy = 0. , **state_sequence_probability; + + + state_sequence_probability = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + state_sequence_probability[i] = new double[nb_state]; + for (j = 0;j < nb_state;j++) { +// state_sequence_probability[i][j] = 0.; + state_sequence_probability[i][j] = D_INF; + } + } +# endif + + // forward recurrence + + switch (type) { + + case ORDINARY : { + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + forward[i][0] = cumul_initial[state[i][0]]; + + if (forward[i][0] != D_INF) { + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + buff = categorical_process[j]->observation[state[i][0]]->cumul[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + buff = discrete_parametric_process[j]->observation[state[i][0]]->cumul[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[i][0] = D_INF; + break; + } + else { + forward[i][0] += buff; + } + } + } + } + + else { + forward[i][0] = D_INF; + } + } + break; + } + + case EQUILIBRIUM : { + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + forward[i][0] = cumul_initial[i]; + + if (forward[i][0] != D_INF) { + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + buff = categorical_process[j]->observation[state[i][0]]->cumul[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + buff = discrete_parametric_process[j]->observation[state[i][0]]->cumul[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[i][0] = D_INF; + break; + } + else { + forward[i][0] += buff; + } + } + } + } + + else { + forward[i][0] = D_INF; + } + } + break; + } + } + + nb_state_sequence = 1; + + for (i = 1;i < seq.length[index];i++) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + + for (j = 1;j < nb_row;j++) { + for (k = 0;k < nb_state_sequence;k++) { + previous_forward[j][k] = forward[j][k]; + } + } + + if (nb_state_sequence < inb_state_sequence) { + if (nb_state_sequence * nb_state < inb_state_sequence) { + nb_state_sequence *= nb_state; + } + else { + nb_state_sequence = inb_state_sequence; + } + } + + for (j = 1;j < nb_row;j++) { + observation = 0.; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + buff = categorical_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + buff = discrete_parametric_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + observation = D_INF; + break; + } + else { + observation += buff; + } + } + + for (k = 1;k < nb_row;k++) { + rank[k] = 0; + } + + for (k = 0;k < nb_state_sequence;k++) { + forward[j][k] = D_INF; + for (m = 0;m < nb_memory[j];m++) { + buff = cumul_transition[previous[j][m]][state[j][0]] + + previous_forward[previous[j][m]][rank[previous[j][m]]]; + if (buff > forward[j][k]) { + forward[j][k] = buff; + optimal_memory[i][j][k] = previous[j][m]; + optimal_rank[i][j][k] = rank[previous[j][m]]; + } + } + + if (forward[j][k] != D_INF) { + rank[optimal_memory[i][j][k]]++; + +/* for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + buff = categorical_process[m]->observation[state[j][0]]->cumul[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + buff = discrete_parametric_process[m]->observation[state[j][0]]->cumul[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[m]->observation[state[j][0]]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[m]->observation[state[j][0]]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1]); + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[m]->observation[state[j][0]]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[m]->observation[state[j][0]]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[j][k] = D_INF; + break; + } + else { + forward[j][k] += buff; + } + } */ + + if (observation == D_INF) { + forward[j][k] = D_INF; + break; + } + else { + forward[j][k] += observation; + } + } + } + } + } + + // extraction of the log-likelihood for the most probable state sequence + + for (i = 1;i < nb_row;i++) { + rank[i] = 0; + } + likelihood_cumul = 0.; + + for (i = 0;i < nb_state_sequence;i++) { + pstate = seq.int_sequence[index][0] + seq.length[index] - 1; + forward_max = D_INF; + + for (j = 1;j < nb_row;j++) { + if (forward[j][rank[j]] > forward_max) { + forward_max = forward[j][rank[j]]; + memory = j; + } + } + + if (i == 0) { + state_seq_likelihood = forward_max; + } + + if (forward_max == D_INF) { + break; + } + + // restoration of the most probable state sequence + + *pstate = state[memory][0]; + active_cell[seq.length[index] - 1][*pstate] = true; + brank = rank[memory]; + rank[memory]++; + +# ifdef DEBUG + cout << "\n" << *pstate << " " << brank << " | "; +# endif + + for (j = seq.length[index] - 1;j > 0;j--) { + previous_rank = optimal_rank[j][memory][brank]; + memory = optimal_memory[j][memory][brank]; + *--pstate = state[memory][0]; + active_cell[j - 1][*pstate] = true; + brank = previous_rank; + +# ifdef DEBUG + cout << *pstate << " " << brank << " | "; +# endif + } + +# ifdef DEBUG + cout << endl; +# endif + + likelihood_cumul += exp(forward_max); + +# ifdef DEBUG + pstate = seq.int_sequence[index][0]; + for (j = 0;j < seq.length[index];j++) { +/* state_sequence_probability[j][*pstate++] += exp(forward_max - seq_likelihood); */ + + if (forward_max > state_sequence_probability[j][*pstate]) { + state_sequence_probability[j][*pstate] = forward_max; + } + pstate++; + } +# endif + + nb_cell = 0; + for (j = 0;j < seq.length[index];j++) { + for (k = 0;k < nb_state;k++) { + if (active_cell[j][k]) { + nb_cell++; + } + } + } + +# ifdef MESSAGE + if (i == 0) { + os << "\n"; + } + + pstate = seq.int_sequence[index][0]; + + switch (format) { + + case ASCII : { + for (j = 0;j < seq.length[index];j++) { + os << *pstate++ << " "; + } + +// os << " " << i + 1 << " " << forward_max << " (" << exp(forward_max - state_seq_likelihood) + os << " " << i + 1 << " " << forward_max << " (" << exp(forward_max - seq_likelihood) + << " " << likelihood_cumul / exp(seq_likelihood) << " " << nb_cell << ")" << endl; + break; + } + + case SPREADSHEET : { + for (j = 0;j < seq.length[index];j++) { + os << *pstate++ << "\t"; + } + +// os << "\t" << i + 1 << "\t" << forward_max << "\t" << exp(forward_max - state_seq_likelihood) + os << "\t" << i + 1 << "\t" << forward_max << "\t" << exp(forward_max - seq_likelihood) + << "\t" << likelihood_cumul / exp(seq_likelihood) << "\t" << nb_cell << endl; + break; + } + } +# endif + +# ifdef DEBUG + entropy -= exp(forward_max - seq_likelihood) * forward_max; +# endif + + } + +# ifdef DEBUG + os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << entropy + seq_likelihood << endl; + + if (likelihood_cumul / exp(seq_likelihood) > 0.8) { + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + if (state_sequence_probability[i][j] != D_INF) { + state_sequence_probability[i][j] = exp(state_sequence_probability[i][j] - seq_likelihood); + } + else { + state_sequence_probability[i][j] = 0.; + } + } + } + + pstate = seq.int_sequence[index][0]; + for (j = 0;j < seq.length[index];j++) { + *pstate++ = I_DEFAULT; + } + +// os << "\n" << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + seq.profile_ascii_print(os , index , nb_state , state_sequence_probability , + STAT_label[STATL_STATE]); + } +# endif + + for (i = 1;i < nb_row;i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 1;i < nb_row;i++) { + delete [] previous_forward[i]; + } + delete [] previous_forward; + + delete [] rank; + + for (i = 0;i < seq.length[index];i++) { + for (j = 1;j < nb_row;j++) { + delete [] optimal_memory[i][j]; + } + delete [] optimal_memory[i]; + } + delete [] optimal_memory; + + for (i = 0;i < seq.length[index];i++) { + for (j = 1;j < nb_row;j++) { + delete [] optimal_rank[i][j]; + } + delete [] optimal_rank[i]; + } + delete [] optimal_rank; + + for (i = 0;i < seq.length[index];i++) { + delete [] active_cell[i]; + } + delete [] active_cell; + + delete [] pioutput; + delete [] proutput; + +# ifdef DEBUG + for (i = 0;i < seq.length[index];i++) { + delete [] state_sequence_probability[i]; + } + delete [] state_sequence_probability; +# endif + + return state_seq_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state profiles using the Viterbi forward-backward algorithm. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index, + * \param[in] os stream, + * \param[in] plot pointer on a MultiPlot object, + * \param[in] format output format (ASCII/SPREADSHEET/GNUPLOT/PLOT), + * \param[in] seq_likelihood log-likelihood for the observed sequence. + * + * \return log-likelihood for the most probable state sequence. + */ +/*--------------------------------------------------------------*/ + +double HiddenVariableOrderMarkov::viterbi_forward_backward(const MarkovianSequences &seq , int index , + ostream *os , MultiPlot *plot , + output_format format , double seq_likelihood) const + +{ + int i , j , k; + int *pstate , **pioutput; + double buff , state_seq_likelihood , backward_max , **forward , **backward , *auxiliary , + **state_backward , **proutput; + + + // initializations + + forward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + forward[i] = new double[nb_row]; + } + + backward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + backward[i] = new double[nb_row]; + } + + auxiliary = new double[nb_row]; + + state_backward = new double*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + state_backward[i] = new double[nb_state]; + } + + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + +# ifdef MESSAGE + int memory , *state_sequence , **optimal_memory; + + optimal_memory = new int*[seq.length[index]]; + for (i = 0;i < seq.length[index];i++) { + optimal_memory[i] = new int[nb_row]; + } + + state_sequence = new int[seq.length[index]]; +# endif + + for (i = 0;i < nb_output_process;i++) { + switch (seq.type[i + 1]) { + case INT_VALUE : + pioutput[i] = seq.int_sequence[index][i + 1]; + break; + case REAL_VALUE : + proutput[i] = seq.real_sequence[index][i + 1]; + break; + } + } + + // forward recurrence + + switch (type) { + + case ORDINARY : { + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + forward[0][i] = cumul_initial[state[i][0]]; + + if (forward[0][i] != D_INF) { + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + buff = categorical_process[j]->observation[state[i][0]]->cumul[*pioutput[j]]; + } + else if (discrete_parametric_process[j]) { + buff = discrete_parametric_process[j]->observation[state[i][0]]->cumul[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[0][i] = D_INF; + break; + } + else { + forward[0][i] += buff; + } + } + } + } + + else { + forward[0][i] = D_INF; + } + } + break; + } + + case EQUILIBRIUM : { + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + forward[0][i] = cumul_initial[i]; + + if (forward[0][i] != D_INF) { + for (j = 0;j < nb_output_process;j++) { + if (categorical_process[j]) { + buff = categorical_process[j]->observation[state[i][0]]->cumul[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + buff = discrete_parametric_process[j]->observation[state[i][0]]->cumul[*pioutput[j]]; + } + + else { + if (((continuous_parametric_process[j]->ident == GAMMA) || + (continuous_parametric_process[j]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[j + 1] < seq.min_interval[j + 1] / 2)) { + switch (seq.type[j + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] , *pioutput[j] + seq.min_interval[j + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] , *proutput[j] + seq.min_interval[j + 1]); + break; + } + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[j]->observation[state[i][0]]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[0][i] = D_INF; + break; + } + else { + forward[0][i] += buff; + } + } + } + } + + else { + forward[0][i] = D_INF; + } + } + break; + } + } + + for (i = 1;i < seq.length[index];i++) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]++; + break; + case REAL_VALUE : + proutput[j]++; + break; + } + } + + for (j = 1;j < nb_row;j++) { + forward[i][j] = D_INF; + for (k = 0;k < nb_memory[j];k++) { + buff = cumul_transition[previous[j][k]][state[j][0]] + forward[i - 1][previous[j][k]]; + if (buff > forward[i][j]) { + forward[i][j] = buff; + +# ifdef MESSAGE + optimal_memory[i][j] = previous[j][k]; +# endif + } + } + + if (forward[i][j] != D_INF) { + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + buff = categorical_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + buff = discrete_parametric_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + + if (buff > 0.) { + buff = log(buff); + } + else { + buff = D_INF; + } + } + + if (buff == D_INF) { + forward[i][j] = D_INF; + break; + } + else { + forward[i][j] += buff; + } + } + } + } + } + + // extraction of the log-likelihood for the most probable state sequence + +# ifdef MESSAGE + pstate = state_sequence + seq.length[index] - 1; +# endif + + state_seq_likelihood = D_INF; + i = seq.length[index] - 1; + for (j = 1;j < nb_row;j++) { + if (forward[i][j] > state_seq_likelihood) { + state_seq_likelihood = forward[i][j]; + +# ifdef MESSAGE + memory = j; +# endif + } + } + + if (state_seq_likelihood != D_INF) { + +# ifdef MESSAGE + *pstate = state[memory][0]; + for (i = seq.length[index] - 1;i > 0;i--) { + memory = optimal_memory[i][memory]; + *--pstate = state[memory][0]; + } +# endif + + // backward recurrence + + i = seq.length[index] - 1; + for (j = 1;j < nb_row;j++) { + backward[i][j] = 0.; + } + + for (i = seq.length[index] - 2;i >= 0;i--) { + for (j = 1;j < nb_row;j++) { + auxiliary[j] = backward[i + 1][j]; + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + auxiliary[j] += categorical_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + auxiliary[j] += discrete_parametric_process[k]->observation[state[j][0]]->cumul[*pioutput[k]]; + } + + else { + if (((continuous_parametric_process[k]->ident == GAMMA) || + (continuous_parametric_process[k]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[k + 1] < seq.min_interval[k + 1] / 2)) { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] , *pioutput[k] + seq.min_interval[k + 1]); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] , *proutput[k] + seq.min_interval[k + 1]); + break; + } + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + buff = continuous_parametric_process[k]->observation[state[j][0]]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + + if (buff > 0.) { + auxiliary[j] += log(buff); + } + else { + auxiliary[j] = D_INF; + } + } + } + } + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j]--; + break; + case REAL_VALUE : + proutput[j]--; + break; + } + } + + for (j = 1;j < nb_row;j++) { + backward[i][j] = D_INF; + if (next[j]) { + for (k = 0;k < nb_state;k++) { + buff = auxiliary[next[j][k]] + cumul_transition[j][k]; + if (buff > backward[i][j]) { + backward[i][j] = buff; + } + } + } + } + } + + // restoration of the most probable state sequence + + pstate = seq.int_sequence[index][0]; + + for (i = 0;i < seq.length[index];i++) { + backward_max = D_INF; + for (j = 1;j < nb_row;j++) { + if (backward[i][j] != D_INF) { + if (forward[i][j] != D_INF) { + backward[i][j] += forward[i][j]; + if (backward[i][j] > backward_max) { + backward_max = backward[i][j]; + *pstate = state[j][0]; + } + } + + else { + backward[i][j] = D_INF; + } + } + } + +# ifdef MESSAGE + if (*pstate != state_sequence[i]) { + cout << "\nERROR: " << i << " | " << *pstate << " " << state_sequence[i] << endl; + } +# endif + + pstate++; + } + + // normalization + + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + state_backward[i][j] = D_INF; + } + for (j = 1;j < nb_row;j++) { + if (backward[i][j] > state_backward[i][state[j][0]]) { + state_backward[i][state[j][0]] = backward[i][j]; + } + } + + for (j = 0;j < nb_state;j++) { + if (state_backward[i][j] != D_INF) { + state_backward[i][j] = exp(state_backward[i][j] - seq_likelihood); +// state_backward[i][j] = exp(state_backward[i][j] - state_seq_likelihood); + } + else { + state_backward[i][j] = 0.; + } + } + } + + switch (format) { + + case ASCII : { + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + seq.profile_ascii_print(*os , index , nb_state , state_backward , + STAT_label[STATL_STATE]); + + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_LIKELIHOOD] << ": " << state_seq_likelihood + << " (" << exp(state_seq_likelihood - seq_likelihood) << ")" << endl; + break; + } + + case SPREADSHEET : { + *os << "\n" << SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY] << "\n\n"; + seq.profile_spreadsheet_print(*os , index , nb_state , state_backward , + STAT_label[STATL_STATE]); + + *os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_LIKELIHOOD] << "\t" << state_seq_likelihood + << "\t" << exp(state_seq_likelihood - seq_likelihood) << endl; + break; + } + + case GNUPLOT : { + seq.profile_plot_print(*os , index , nb_state , state_backward); + break; + } + + case PLOT : { + seq.profile_plotable_write(*plot , index , nb_state , state_backward); + break; + } + } + +# ifdef DEBUG + if (format != GNUPLOT) { + double ambiguity = 0.; + + pstate = seq.int_sequence[index][0]; + for (i = 0;i < seq.length[index];i++) { + for (j = 0;j < nb_state;j++) { + if (j != *pstate) { + ambiguity += state_backward[i][j]; + } + } + pstate++; + } + ambiguity *= exp(seq_likelihood - state_seq_likelihood); + + switch (format) { + case ASCII : + *os << "\n" << SEQ_label[SEQL_AMBIGUITY] << ": " << ambiguity + << " (" << ambiguity / seq.length[index] << ")" << endl; + break; + case SPREADSHEET : + *os << "\n" << SEQ_label[SEQL_AMBIGUITY] << "\t" << ambiguity + << "\t" << ambiguity / seq.length[index] << "\t" << endl; + break; + } + } +# endif + + } + + for (i = 0;i < seq.length[index];i++) { + delete [] forward[i]; + } + delete [] forward; + + for (i = 0;i < seq.length[index];i++) { + delete [] backward[i]; + } + delete [] backward; + + delete [] auxiliary; + + for (i = 0;i < seq.length[index];i++) { + delete [] state_backward[i]; + } + delete [] state_backward; + + delete [] pioutput; + delete [] proutput; + +# ifdef MESSAGE + for (i = 0;i < seq.length[index];i++) { + delete [] optimal_memory[i]; + } + delete [] optimal_memory; + + delete [] state_sequence; +# endif + + return state_seq_likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * writing of the results. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] format format (ASCII/SPREADSHEET), + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::state_profile_write(StatError &error , ostream &os , + const MarkovianSequences &iseq , + int identifier , output_format format , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + bool status = true; + int i; + int offset = I_DEFAULT , nb_value , index = I_DEFAULT; + double seq_likelihood , max_marginal_entropy , entropy; + HiddenVariableOrderMarkov *hmarkov; + VariableOrderMarkovData *seq; + + + error.init(); + + for (i = 0;i < iseq.nb_variable;i++) { + if ((iseq.type[i] != INT_VALUE) && (iseq.type[i] != REAL_VALUE) && (iseq.type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (nb_output_process == iseq.nb_variable) { + offset = 0; + } + else if ((iseq.type[0] == STATE) && (nb_output_process + 1 == iseq.nb_variable)) { + offset = 1; + } + else { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + if (offset != I_DEFAULT) { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (iseq.type[i + offset] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (iseq.min_value[i + offset] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!(iseq.marginal_distribution[i + offset])) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < iseq.marginal_distribution[i + offset]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + if (identifier != I_DEFAULT) { + for (i = 0;i < iseq.nb_sequence;i++) { + if (identifier == iseq.identifier[i]) { + index = i; + break; + } + } + + if (i == iseq.nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + if (nb_state_sequence < 2) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE_SEQUENCE]); + } + + if (status) { + if (nb_output_process == iseq.nb_variable) { + seq = new VariableOrderMarkovData(iseq); + } + else { + seq = new VariableOrderMarkovData(iseq , SEQUENCE_COPY , (type == EQUILIBRIUM ? true : false)); + } + + hmarkov = new HiddenVariableOrderMarkov(*this , false); + hmarkov->create_cumul(); + hmarkov->log_computation(); + + for (i = 0;i < seq->nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + seq_likelihood = forward_backward(*seq , i , &os , NULL , format , + max_marginal_entropy , entropy); + + if (seq_likelihood == D_INF) { + status = false; + + if (index == I_DEFAULT) { + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << " " + << SEQ_error[SEQR_INCOMPATIBLE_MODEL]; + error.update((error_message.str()).c_str()); + } + else { + error.update(SEQ_error[SEQR_SEQUENCE_INCOMPATIBLE_MODEL]); + } + } + + else { + hmarkov->viterbi_forward_backward(*seq , i , &os , NULL , format , + seq_likelihood); + + switch (state_sequence) { + case GENERALIZED_VITERBI : + hmarkov->generalized_viterbi(*seq , i , os , seq_likelihood , format , + nb_state_sequence); + break; + case FORWARD_BACKWARD_SAMPLING : + forward_backward_sampling(*seq , i , os , format , nb_state_sequence); + break; + } + } + } + } + + delete seq; + delete hmarkov; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * displaying the results. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the state and entropy profiles and the N most probable state sequences, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::state_profile_ascii_write(StatError &error , ostream &os , + const MarkovianSequences &iseq , int identifier , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + return state_profile_write(error , os , iseq , identifier , ASCII , + state_sequence , nb_state_sequence); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * writing of the results in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::state_profile_write(StatError &error , const string path , + const MarkovianSequences &iseq , + int identifier , output_format format , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + bool status = true; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + else { + status = state_profile_write(error , out_file , iseq , identifier , + format , state_sequence , nb_state_sequence); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * displaying the results. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the state and entropy profiles and the N most probable state sequences, + * \param[in] identifier sequence identifier, + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::state_profile_ascii_write(StatError &error , ostream &os , int identifier , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + bool status; + + + error.init(); + + if (!markov_data) { + status = false; + error.update(STAT_error[STATR_NO_DATA]); + } + else { + status = state_profile_write(error , os , *markov_data , identifier , ASCII , + state_sequence , nb_state_sequence); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm, + * computation of the N most probable state sequences using the generalized Viterbi algorithm or + * simulation of state sequences using the forward-backward algorithm for sampling and + * writing of the results in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] identifier sequence identifier, + * \param[in] format file format (ASCII/SPREADSHEET), + * \param[in] state_sequence method for computing the state sequences (GENERALIZED_VITERBI/FORWARD_BACKWARD_SAMPLING), + * \param[in] nb_state_sequence number of state sequences. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::state_profile_write(StatError &error , const string path , + int identifier , output_format format , + latent_structure_algorithm state_sequence , + int nb_state_sequence) const + +{ + bool status = true; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + if (!markov_data) { + status = false; + error.update(STAT_error[STATR_NO_DATA]); + } + + if (status) { + status = state_profile_write(error , out_file , *markov_data , identifier , + format , state_sequence , nb_state_sequence); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm and + * plot of the results at the Gnuplot format. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::state_profile_plot_write(StatError &error , const char *prefix , + const MarkovianSequences &iseq , + int identifier , const char *title) const + +{ + bool status = true; + int i , j; + int offset = I_DEFAULT , nb_value , index; + double seq_likelihood , max_marginal_entropy , entropy , state_seq_likelihood; + HiddenVariableOrderMarkov *hmarkov; + VariableOrderMarkovData *seq; + ostringstream data_file_name[2]; + ofstream *out_data_file; + + + error.init(); + + for (i = 0;i < iseq.nb_variable;i++) { + if ((iseq.type[i] != INT_VALUE) && (iseq.type[i] != REAL_VALUE) && (iseq.type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (nb_output_process == iseq.nb_variable) { + offset = 0; + } + else if ((iseq.type[0] == STATE) && (nb_output_process + 1 == iseq.nb_variable)) { + offset = 1; + } + else { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + if (offset != I_DEFAULT) { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (iseq.type[i + offset] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (iseq.min_value[i + offset] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!(iseq.marginal_distribution[i + offset])) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < iseq.marginal_distribution[i + offset]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + for (i = 0;i < iseq.nb_sequence;i++) { + if (identifier == iseq.identifier[i]) { + index = i; + break; + } + } + + if (i == iseq.nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + + if (status) { + + // writing of the date files + + data_file_name[0] << prefix << 0 << ".dat"; + out_data_file = new ofstream((data_file_name[0].str()).c_str()); + + if (!out_data_file) { + status = false; + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + else { + if (iseq.type[0] != STATE) { + seq = new VariableOrderMarkovData(iseq); + } + else { + seq = new VariableOrderMarkovData(iseq , SEQUENCE_COPY , (type == EQUILIBRIUM ? true : false)); + } + + seq_likelihood = forward_backward(*seq , index , out_data_file , NULL , GNUPLOT , + max_marginal_entropy , entropy); + out_data_file->close(); + delete out_data_file; + + if (seq_likelihood == D_INF) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_INCOMPATIBLE_MODEL]); + } + + else { + data_file_name[1] << prefix << 1 << ".dat"; + out_data_file = new ofstream((data_file_name[1].str()).c_str()); + + hmarkov = new HiddenVariableOrderMarkov(*this , false); + + hmarkov->create_cumul(); + hmarkov->log_computation(); + state_seq_likelihood = hmarkov->viterbi_forward_backward(*seq , index , out_data_file , NULL , + GNUPLOT , seq_likelihood); + out_data_file->close(); + delete out_data_file; + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY] << "\"\n\n"; + + if (seq->index_parameter) { + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [" << seq->index_parameter[index][0] << ":" + << seq->index_parameter[index][seq->length[index] - 1] << "] [0:" + << exp(state_seq_likelihood - seq_likelihood) << "] "; + for (j = 0;j < nb_state;j++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " : " << j + 2 << " title \"" << STAT_label[STATL_STATE] << " " + << j << "\" with linespoints"; + if (j < nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\"\n\n"; + + out_file << "plot [" << seq->index_parameter[index][0] << ":" + << seq->index_parameter[index][seq->length[index] - 1] << "] [0:1] "; + for (j = 0;j < nb_state;j++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << j + 2 << " title \"" << STAT_label[STATL_STATE] << " " + << j << "\" with linespoints"; + if (j < nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [" << seq->index_parameter[index][0] << ":" + << seq->index_parameter[index][seq->length[index] - 1] << "] [0:" + << max_marginal_entropy << "] " + << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << nb_state + 2 << " title \"" << SEQ_label[SEQL_CONDITIONAL_ENTROPY] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << nb_state + 3 << " title \"" << SEQ_label[SEQL_CONDITIONAL_ENTROPY] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << nb_state + 4 << " title \"" << SEQ_label[SEQL_MARGINAL_ENTROPY] + << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [" << seq->index_parameter[index][0] << ":" + << seq->index_parameter[index][seq->length[index] - 1] << "] [0:" << entropy << "] " + << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << nb_state + 5 << " title \"" + << SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY] << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << 1 << " : " << nb_state + 6 << " title \"" + << SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY] << "\" with linespoints" << endl; + + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + else { + if (seq->length[index] - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << seq->length[index] - 1 << "] [0:" + << exp(state_seq_likelihood - seq_likelihood) << "] "; + for (j = 0;j < nb_state;j++) { + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " +// << j + 1 << " title \"" << STAT_label[STATL_STATE] << " " + << 1 << " : " << j + 2 << " title \"" << STAT_label[STATL_STATE] << " " + << j << "\" with linespoints"; + if (j < nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY] << "\"\n\n"; + + out_file << "plot [0:" << seq->length[index] - 1 << "] [0:1] "; + for (j = 0;j < nb_state;j++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << j + 1 << " title \"" << STAT_label[STATL_STATE] << " " + << j << "\" with linespoints"; + if (j < nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [0:" << seq->length[index] - 1 << "] [0:" << max_marginal_entropy << "] " + << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << nb_state + 1 << " title \"" << SEQ_label[SEQL_CONDITIONAL_ENTROPY] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << nb_state + 2 << " title \"" << SEQ_label[SEQL_CONDITIONAL_ENTROPY] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << nb_state + 3 << " title \"" << SEQ_label[SEQL_MARGINAL_ENTROPY] + << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + out_file << "plot [0:" << seq->length[index] - 1 << "] [0:" << entropy << "] " + << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << nb_state + 4 << " title \"" << SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << nb_state + 5 << " title \"" << SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY] + << "\" with linespoints" << endl; + + if (seq->length[index] - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + delete hmarkov; + } + + delete seq; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm and + * plot of the results at the Gnuplot format. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in identifier sequence identifier, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool HiddenVariableOrderMarkov::state_profile_plot_write(StatError &error , + const char *prefix , int identifier , + const char *title) const + +{ + bool status; + + + error.init(); + + if (!markov_data) { + status = false; + error.update(STAT_error[STATR_NO_DATA]); + } + else { + status = state_profile_plot_write(error , prefix , *markov_data , identifier , title); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm and + * plot of the results. + * + * \param[in] error reference on a StatError object, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] identifier sequence identifier. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* HiddenVariableOrderMarkov::state_profile_plotable_write(StatError &error , + const MarkovianSequences &iseq , + int identifier) const + +{ + bool status = true; + int i; + int offset = I_DEFAULT , nb_value , index; + double seq_likelihood , max_marginal_entropy , entropy , state_seq_likelihood; + HiddenVariableOrderMarkov *hmarkov; + VariableOrderMarkovData *seq; + ostringstream legend; + MultiPlotSet *plot_set; + + + plot_set = NULL; + error.init(); + + for (i = 0;i < iseq.nb_variable;i++) { + if ((iseq.type[i] != INT_VALUE) && (iseq.type[i] != REAL_VALUE) && (iseq.type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (nb_output_process == iseq.nb_variable) { + offset = 0; + } + else if ((iseq.type[0] == STATE) && (nb_output_process + 1 == iseq.nb_variable)) { + offset = 1; + } + else { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + if (offset != I_DEFAULT) { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (iseq.type[i + offset] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (iseq.min_value[i + offset] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!(iseq.marginal_distribution[i + offset])) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + offset + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < iseq.marginal_distribution[i + offset]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + for (i = 0;i < iseq.nb_sequence;i++) { + if (identifier == iseq.identifier[i]) { + index = i; + break; + } + } + + if (i == iseq.nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + + if (status) { + plot_set = new MultiPlotSet(4); + + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + if (iseq.type[0] != STATE) { + seq = new VariableOrderMarkovData(iseq); + } + else { + seq = new VariableOrderMarkovData(iseq , SEQUENCE_COPY , (type == EQUILIBRIUM ? true : false)); + } + + seq_likelihood = forward_backward(*seq , index , NULL , plot_set , PLOT , + max_marginal_entropy , entropy); + + if (seq_likelihood == D_INF) { + delete plot_set; + plot_set = NULL; + error.update(SEQ_error[SEQR_SEQUENCE_INCOMPATIBLE_MODEL]); + } + + else { + hmarkov = new HiddenVariableOrderMarkov(*this , false); + + hmarkov->create_cumul(); + hmarkov->log_computation(); + state_seq_likelihood = hmarkov->viterbi_forward_backward(*seq , index , NULL , &plot[0] , + PLOT , seq_likelihood); + + // maximum posterior probabilities + + plot[0].title = SEQ_label[SEQL_MAX_POSTERIOR_STATE_PROBABILITY]; + + if (seq->index_parameter) { + plot[0].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq->length[index] - 1]); + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[0].xtics = 1; + } + } + + else { + plot[0].xrange = Range(0 , seq->length[index] - 1); + if (seq->length[index] - 1 < TIC_THRESHOLD) { + plot[0].xtics = 1; + } + } + + plot[0].yrange = Range(0. , exp(state_seq_likelihood - seq_likelihood)); + + for (i = 0;i < nb_state;i++) { + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i; + plot[0][i].legend = legend.str(); + + plot[0][i].style = "linespoints"; + } + + // smoothed probabilities + + plot[1].title = SEQ_label[SEQL_POSTERIOR_STATE_PROBABILITY]; + + if (seq->index_parameter) { + plot[1].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq->length[index] - 1]); + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[1].xtics = 1; + } + } + + else { + plot[1].xrange = Range(0 , seq->length[index] - 1); + if (seq->length[index] - 1 < TIC_THRESHOLD) { + plot[1].xtics = 1; + } + } + + plot[1].yrange = Range(0. , 1.); + + for (i = 0;i < nb_state;i++) { + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i; + plot[1][i].legend = legend.str(); + + plot[1][i].style = "linespoints"; + } + + // conditional entropy profiles + + if (seq->index_parameter) { + plot[2].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq->length[index] - 1]); + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[2].xtics = 1; + } + } + + else { + plot[2].xrange = Range(0 , seq->length[index] - 1); + if (seq->length[index] - 1 < TIC_THRESHOLD) { + plot[2].xtics = 1; + } + } + + plot[2].yrange = Range(0. , max_marginal_entropy); + + plot[2][0].legend = SEQ_label[SEQL_CONDITIONAL_ENTROPY]; + plot[2][0].style = "linespoints"; + + plot[2][1].legend = SEQ_label[SEQL_CONDITIONAL_ENTROPY]; + plot[2][1].style = "linespoints"; + + plot[2][2].legend = SEQ_label[SEQL_MARGINAL_ENTROPY]; + plot[2][2].style = "linespoints"; + + // partial entropy profiles + + if (seq->index_parameter) { + plot[3].xrange = Range(seq->index_parameter[index][0] , seq->index_parameter[index][seq->length[index] - 1]); + if (seq->index_parameter[index][seq->length[index] - 1] - seq->index_parameter[index][0] < TIC_THRESHOLD) { + plot[3].xtics = 1; + } + } + + else { + plot[3].xrange = Range(0 , seq->length[index] - 1); + if (seq->length[index] - 1 < TIC_THRESHOLD) { + plot[3].xtics = 1; + } + } + + plot[3].yrange = Range(0. ,entropy); + + plot[3][0].legend = SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY]; + plot[3][0].style = "linespoints"; + + plot[3][1].legend = SEQ_label[SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY]; + plot[3][1].style = "linespoints"; + + delete hmarkov; + } + + delete seq; + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state and entropy profiles using the forward-backward algorithm, + * of state profiles using the Viterbi forward-backward algorithm and + * plot of the results. + * + * \param[in] error reference on a StatError object, + * \param[in] identifier sequence identifier. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* HiddenVariableOrderMarkov::state_profile_plotable_write(StatError &error , + int identifier) const + +{ + MultiPlotSet *plot_set; + + + error.init(); + + if (!markov_data) { + plot_set = NULL; + error.update(STAT_error[STATR_NO_DATA]); + } + else { + plot_set = state_profile_plotable_write(error , *markov_data , identifier); + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the most probable state sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] characteristic_flag flag on the computation of the characteristic distributions. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* HiddenVariableOrderMarkov::state_sequence_computation(StatError &error , + const MarkovianSequences &iseq , + bool characteristic_flag) const + +{ + bool status = true; + int i; + int nb_value; + HiddenVariableOrderMarkov *hmarkov; + VariableOrderMarkovData *seq; + + + seq = NULL; + error.init(); + + for (i = 0;i < iseq.nb_variable;i++) { + if ((iseq.type[i] != INT_VALUE) && (iseq.type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (nb_output_process != iseq.nb_variable) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + else { + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (iseq.type[i] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (iseq.min_value[i] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!(iseq.marginal_distribution[i])) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < iseq.marginal_distribution[i]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT_PROCESS] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + if (status) { + seq = new VariableOrderMarkovData(iseq , ADD_STATE_VARIABLE , (type == EQUILIBRIUM ? true : false)); + + seq->markov = new VariableOrderMarkov(*this , false); + + hmarkov = new HiddenVariableOrderMarkov(*this , false); + + hmarkov->forward_backward(*seq); + + hmarkov->create_cumul(); + hmarkov->log_computation(); + hmarkov->viterbi(*seq); + + // extraction of the characteristics of the sequences and + // computation of the characteristic distributions of the model + + if (seq->restoration_likelihood == D_INF) { + delete seq; + seq = NULL; + error.update(SEQ_error[SEQR_STATE_SEQUENCE_COMPUTATION_FAILURE]); + } + + else { + seq->likelihood = likelihood_computation(iseq , seq->posterior_probability); + +/* seq->min_value_computation(0); + seq->max_value_computation(0); */ + + seq->min_value[0] = 0; + seq->max_value[0] = nb_state - 1; + seq->build_marginal_frequency_distribution(0); + seq->build_characteristic(0); + + seq->build_transition_count(*hmarkov); + seq->build_observation_frequency_distribution(nb_state); + +/* if ((seq->max_value[0] < nb_state - 1) || (!(seq->characteristics[0]))) { + delete seq; + seq = NULL; + error.update(SEQ_error[SEQR_STATES_NOT_REPRESENTED]); + } + + else if (characteristic_flag) { */ + if (characteristic_flag) { + seq->markov->characteristic_computation(*seq , true); + } + } + + delete hmarkov; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Comparison of hidden variable-order Markov chains for a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the results of model comparison, + * \param[in] nb_model number of hidden variable-order Markov chains, + * \param[in] ihmarkov pointer on the HiddenVariableOrderMarkov objects, + * \param[in] algorithm type of algorithm (FORWARD/VITERBI), + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::comparison(StatError &error , ostream *os , int nb_model , + const HiddenVariableOrderMarkov **ihmarkov , + latent_structure_algorithm algorithm , const string path) const + +{ + bool status = true; + int i , j; + int nb_value; + double **likelihood; + HiddenVariableOrderMarkov **hmarkov; + VariableOrderMarkovData *seq; + + + error.init(); + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + for (i = 0;i < nb_model;i++) { + if (ihmarkov[i]->nb_output_process != nb_variable) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT_PROCESS]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < nb_variable;j++) { + if ((ihmarkov[i]->categorical_process[j]) || (ihmarkov[i]->discrete_parametric_process[j])) { + if (type[j] == REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << j + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + if (min_value[j] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << j + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[j]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << j + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + if (ihmarkov[i]->categorical_process[j]) { + nb_value = ihmarkov[i]->categorical_process[j]->nb_value; + } + else { + nb_value = ihmarkov[i]->discrete_parametric_process[j]->nb_value; + } + + if (nb_value < marginal_distribution[j]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 1 << ": " + << STAT_label[STATL_OUTPUT_PROCESS] << " " << j + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + } + + if (status) { + likelihood = new double*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + likelihood[i] = new double[nb_model]; + } + + if (algorithm == VITERBI) { + hmarkov = new HiddenVariableOrderMarkov*[nb_model]; + for (i = 0;i < nb_model;i++) { + hmarkov[i] = new HiddenVariableOrderMarkov(*(ihmarkov[i]) , false); + hmarkov[i]->create_cumul(); + hmarkov[i]->log_computation(); + } + + seq = new VariableOrderMarkovData(*this); + } + + // for each sequence, computation of the log-likelihood for the observed sequence (FORWARD) or + // of the log-likelihood for the most probable state sequence (VITERBI) for each model + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_model;j++) { + switch (algorithm) { + case FORWARD : + likelihood[i][j] = ihmarkov[j]->likelihood_computation(*this , NULL , i); + break; + case VITERBI : + likelihood[i][j] = hmarkov[j]->viterbi(*seq , NULL , i); + break; + } + } + } + + if (os) { + likelihood_write(*os , nb_model , likelihood , SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] , + true , algorithm); + } + if (!path.empty()) { + status = likelihood_write(error , path , nb_model , likelihood , + SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] , algorithm); + } + + for (i = 0;i < nb_sequence;i++) { + delete [] likelihood[i]; + } + delete [] likelihood; + + if (algorithm == VITERBI) { + for (i = 0;i < nb_model;i++) { + delete hmarkov[i]; + } + delete [] hmarkov; + + delete seq; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a hidden variable-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] divergence_flag flag on the computation of the Kullback-Leibler divergence. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* HiddenVariableOrderMarkov::simulation(StatError &error , + const FrequencyDistribution &length_distribution , + bool counting_flag , + bool divergence_flag) const + +{ + int i; + MarkovianSequences *observed_seq; + VariableOrderMarkovData *seq; + + + seq = VariableOrderMarkov::simulation(error , length_distribution , counting_flag , divergence_flag); + + if ((seq) && (!divergence_flag)) { + seq->posterior_probability = new double[seq->nb_sequence]; + for (i = 0;i < seq->nb_sequence;i++) { + seq->posterior_probability[i] = VariableOrderMarkov::likelihood_computation(*seq , i); + } + +/* for (i = 0;i < nb_output_process;i++) { + if (continuous_parametric_process[i]) { + seq->restoration_likelihood = VariableOrderMarkov::likelihood_computation(*seq , I_DEFAULT); + break; + } + } */ + + observed_seq = seq->remove_variable_1(); + seq->likelihood = likelihood_computation(*observed_seq , seq->posterior_probability); + delete observed_seq; + + forward_backward(*seq); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a hidden variable-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of generated sequences. + * \param[in] length sequence length. + * \param[in] counting_flag flag on the computation of the counting distributions, + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* HiddenVariableOrderMarkov::simulation(StatError &error , + int nb_sequence , int length , + bool counting_flag) const + +{ + int i; + MarkovianSequences *observed_seq; + VariableOrderMarkovData *seq; + + + seq = VariableOrderMarkov::simulation(error , nb_sequence , length , counting_flag); + + if (seq) { + seq->posterior_probability = new double[seq->nb_sequence]; + for (i = 0;i < seq->nb_sequence;i++) { + seq->posterior_probability[i] = VariableOrderMarkov::likelihood_computation(*seq , i); + } + +/* for (i = 0;i < nb_output_process;i++) { + if (continuous_parametric_process[i]) { + seq->restoration_likelihood = VariableOrderMarkov::likelihood_computation(*seq , I_DEFAULT); + break; + } + } */ + + observed_seq = seq->remove_variable_1(); + seq->likelihood = likelihood_computation(*observed_seq , seq->posterior_probability); + delete observed_seq; + + forward_backward(*seq); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a hidden variable-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of generated sequences, + * \param[in] iseq reference on a MarkovianSequences object. + * \param[in] counting_flag flag on the computation of the counting distributions, + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* HiddenVariableOrderMarkov::simulation(StatError &error , + int nb_sequence , + const MarkovianSequences &iseq , + bool counting_flag) const + +{ + int i; + MarkovianSequences *observed_seq; + VariableOrderMarkovData *seq; + + + seq = VariableOrderMarkov::simulation(error , nb_sequence , iseq , counting_flag); + + if (seq) { + seq->posterior_probability = new double[seq->nb_sequence]; + for (i = 0;i < seq->nb_sequence;i++) { + seq->posterior_probability[i] = VariableOrderMarkov::likelihood_computation(*seq , i); + } + +/* for (i = 0;i < nb_output_process;i++) { + if (continuous_parametric_process[i]) { + seq->restoration_likelihood = VariableOrderMarkov::likelihood_computation(*seq , I_DEFAULT); + break; + } + } */ + + observed_seq = seq->remove_variable_1(); + seq->likelihood = likelihood_computation(*observed_seq , seq->posterior_probability); + delete observed_seq; + + forward_backward(*seq); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between hidden variable-order Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of hidden variable-order Markov chains, + * \param[in] ihmarkov pointer on the HiddenVariableOrderMarkov objects, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* HiddenVariableOrderMarkov::divergence_computation(StatError &error , ostream *os , int nb_model , + const HiddenVariableOrderMarkov **ihmarkov , + FrequencyDistribution **length_distribution , + const string path) const + +{ + bool status = true , lstatus; + int i , j , k; + int cumul_length , nb_failure; + double **likelihood; + long double divergence; + const HiddenVariableOrderMarkov **hmarkov; + MarkovianSequences *seq; + VariableOrderMarkovData *simul_seq; + DistanceMatrix *dist_matrix; + ofstream *out_file; + + + dist_matrix = NULL; + error.init(); + + for (i = 0;i < nb_model - 1;i++) { + if (ihmarkov[i]->type != type) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 2 << ": " + << SEQ_error[SEQR_MODEL_TYPE]; + error.update((error_message.str()).c_str()); + } + + if (ihmarkov[i]->nb_output_process != nb_output_process) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_error[STATR_NB_OUTPUT_PROCESS]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < nb_output_process;j++) { + if ((categorical_process[j]) && (ihmarkov[i]->categorical_process[j]) && + (ihmarkov[i]->categorical_process[j]->nb_value != categorical_process[j]->nb_value)) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_label[STATL_OUTPUT_PROCESS] << " " << j << " " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + + if (((continuous_parametric_process[j]) && (!(ihmarkov[i]->continuous_parametric_process[j]))) || + ((!continuous_parametric_process[j]) && (ihmarkov[i]->continuous_parametric_process[j]))) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_label[STATL_OUTPUT_PROCESS] << " " << j << " " + << SEQ_error[SEQR_OUTPUT_PROCESS_TYPE]; + error.update((error_message.str()).c_str()); + } + } + } + } + + for (i = 0;i < nb_model;i++) { + lstatus = true; + + if ((length_distribution[i]->nb_element < 1) || (length_distribution[i]->nb_element > NB_SEQUENCE)) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_NB_SEQUENCE]; + error.update((error_message.str()).c_str()); + } + if (length_distribution[i]->offset < 2) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + if (length_distribution[i]->nb_value - 1 > MAX_LENGTH) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + + if (!lstatus) { + status = false; + } + + else { + cumul_length = 0; + for (j = length_distribution[i]->offset;j < length_distribution[i]->nb_value;j++) { + cumul_length += j * length_distribution[i]->frequency[j]; + } + + if (cumul_length > CUMUL_LENGTH) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_CUMUL_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + out_file = NULL; + + if (!path.empty()) { + out_file = new ofstream(path.c_str()); + + if (!out_file) { + error.update(STAT_error[STATR_FILE_NAME]); + if (os) { + *os << error; + } + } + } + + hmarkov = new const HiddenVariableOrderMarkov*[nb_model]; + + hmarkov[0] = this; + for (i = 1;i < nb_model;i++) { + hmarkov[i] = ihmarkov[i - 1]; + } + + dist_matrix = new DistanceMatrix(nb_model , SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN]); + + for (i = 0;i < nb_model;i++) { + + // generation of a sample of sequences using a hidden variable-order Markov chain + + simul_seq = hmarkov[i]->simulation(error , *length_distribution[i] , false , true); + seq = simul_seq->remove_variable_1(); + + likelihood = new double*[seq->nb_sequence]; + for (j = 0;j < seq->nb_sequence;j++) { + likelihood[j] = new double[nb_model]; + } + + for (j = 0;j < seq->nb_sequence;j++) { + likelihood[j][i] = hmarkov[i]->likelihood_computation(*seq , NULL , j); + + if ((os) && (likelihood[j][i] == D_INF)) { + *os << "\nERROR - " << SEQ_error[SEQR_REFERENCE_MODEL] << ": " << i + 1 << endl; + } + } + + // computation of the log-likelihood of each hidden variable-order Markov chain for the sample of sequences + + for (j = 0;j < nb_model;j++) { + if (j != i) { + divergence = 0.; + cumul_length = 0; + nb_failure = 0; + + for (k = 0;k < seq->nb_sequence;k++) { + likelihood[k][j] = hmarkov[j]->likelihood_computation(*seq , NULL , k); + +// if (divergence != -D_INF) { + if (likelihood[k][j] != D_INF) { + divergence += likelihood[k][i] - likelihood[k][j]; + cumul_length += seq->length[k]; + } + else { + nb_failure++; +// divergence = -D_INF; + } +// } + } + + if ((os) && (nb_failure > 0)) { + *os << "\nWARNING - " << SEQ_error[SEQR_REFERENCE_MODEL] << ": " << i + 1 << ", " + << SEQ_error[SEQR_TARGET_MODEL] << ": " << j + 1 << " - " + << SEQ_error[SEQR_DIVERGENCE_NB_FAILURE] << ": " << nb_failure << endl; + } + +// if (divergence != -D_INF) { + dist_matrix->update(i + 1 , j + 1 , divergence , cumul_length); +// } + } + } + + if (os) { + *os << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 1 << ": " << seq->nb_sequence << " " + << SEQ_label[SEQL_SIMULATED] << " " << SEQ_label[seq->nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << endl; + seq->likelihood_write(cout , nb_model , likelihood , SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN]); + } + if (out_file) { + *out_file << SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN] << " " << i + 1 << ": " << seq->nb_sequence << " " + << SEQ_label[SEQL_SIMULATED] << " " << SEQ_label[seq->nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << endl; + seq->likelihood_write(*out_file , nb_model , likelihood , SEQ_label[SEQL_HIDDEN_MARKOV_CHAIN]); + } + + for (j = 0;j < seq->nb_sequence;j++) { + delete [] likelihood[j]; + } + delete [] likelihood; + + delete seq; + delete simul_seq; + } + + if (out_file) { + out_file->close(); + delete out_file; + } + + delete hmarkov; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between hidden variable-order Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of hidden variable-order Markov chains, + * \param[in] hmarkov pointer on the HiddenVariableOrderMarkov objects, + * \param[in] nb_sequence number of generated sequences, + * \param[in] length sequence length, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* HiddenVariableOrderMarkov::divergence_computation(StatError &error , ostream *os , + int nb_model , const HiddenVariableOrderMarkov **hmarkov , + int nb_sequence , int length , const string path) const + +{ + bool status = true; + int i; + FrequencyDistribution **length_distribution; + DistanceMatrix *dist_matrix; + + + dist_matrix = NULL; + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + length_distribution = new FrequencyDistribution*[nb_model]; + + length_distribution[0] = new FrequencyDistribution(length + 1); + + length_distribution[0]->nb_element = nb_sequence; + length_distribution[0]->offset = length; + length_distribution[0]->max = nb_sequence; + length_distribution[0]->mean = length; + length_distribution[0]->variance = 0.; + length_distribution[0]->frequency[length] = nb_sequence; + + for (i = 1;i < nb_model;i++) { + length_distribution[i] = new FrequencyDistribution(*length_distribution[0]); + } + + dist_matrix = divergence_computation(error , os , nb_model , hmarkov , length_distribution , path); + + for (i = 0;i < nb_model;i++) { + delete length_distribution[i]; + } + delete [] length_distribution; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between hidden variable-order Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of hidden variable-order Markov chains, + * \param[in] hmarkov pointer on the HiddenVariableOrderMarkov objects, + * \param[in] nb_sequence number of generated sequences, + * \param[in] seq pointer on MarkovianSequences objects, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* HiddenVariableOrderMarkov::divergence_computation(StatError &error , ostream *os , + int nb_model , const HiddenVariableOrderMarkov **hmarkov , + int nb_sequence , const MarkovianSequences **seq , + const string path) const + +{ + int i; + FrequencyDistribution **length_distribution; + DistanceMatrix *dist_matrix; + + + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + dist_matrix = NULL; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + length_distribution = new FrequencyDistribution*[nb_model]; + for (i = 0;i < nb_model;i++) { + length_distribution[i] = seq[i]->length_distribution->frequency_scale(nb_sequence); + } + + dist_matrix = divergence_computation(error , os , nb_model , hmarkov , length_distribution , path); + + for (i = 0;i < nb_model;i++) { + delete length_distribution[i]; + } + delete [] length_distribution; + } + + return dist_matrix; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/markovian_sequences1.cpp b/src/cpp/sequence_analysis/markovian_sequences1.cpp new file mode 100644 index 0000000..99ea6b3 --- /dev/null +++ b/src/cpp/sequence_analysis/markovian_sequences1.cpp @@ -0,0 +1,5567 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include +#include + +#include + +#include "stat_tool/stat_label.h" + +// #include "stat_tool/quantile_computation.hpp" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the MarkovianSequences class. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences::MarkovianSequences() + +{ + min_interval = NULL; + + self_transition = NULL; + observation_distribution = NULL; + observation_histogram = NULL; + characteristics = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default initialization of the data members of the MarkovianSequences class. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::init() + +{ + int i; + + + min_interval = new double[nb_variable]; + for (i = 0;i < nb_variable;i++) { + min_interval[i] = 0.; + } + + self_transition = NULL; + observation_distribution = NULL; + observation_histogram = NULL; + + characteristics = new SequenceCharacteristics*[nb_variable]; + for (i = 0;i < nb_variable;i++) { + characteristics[i] = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a MarkovianSequences object from a Sequences object. + * + * \param[in] seq reference on a Sequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences::MarkovianSequences(const Sequences &seq) +:Sequences(seq) + +{ + int i; + + + init(); + + min_interval = new double[nb_variable]; + for (i = 0;i < nb_variable;i++) { + min_interval_computation(i); + } + + build_characteristic(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a MarkovianSequences object adding auxiliary variables. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] auxiliary flags on the addition of auxiliary variables. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences::MarkovianSequences(const MarkovianSequences &seq , bool *auxiliary) +:Sequences(seq , auxiliary) + +{ + int i , j; + + + min_interval = new double[nb_variable]; + + self_transition = NULL; + observation_distribution = NULL; + observation_histogram = NULL; + + characteristics = new SequenceCharacteristics*[nb_variable]; + + i = 0; + for (j = 0;j < seq.nb_variable;j++) { + min_interval[i] = seq.min_interval[j]; + + if (seq.characteristics[j]) { + characteristics[i] = new SequenceCharacteristics(*(seq.characteristics[j])); + } + else { + characteristics[i] = NULL; + } + i++; + + if (auxiliary[j]) { + min_interval[i] = 0.; + characteristics[i] = NULL; + i++; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a MarkovianSequences exactly as a Sequences object + * + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] ilength sequence lengths, + * \param[in] ivertex_identifier vertex identifiers of the associated MTG, + * \param[in] iindex_param_type index parameter type, + * \param[in] inb_variable number of variables, + * \param[in] itype variable types, + * \param[in] vertex_identifier_copy flag copy of vertex identifiers, + * \param[in] init_flag flag initialization. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences::MarkovianSequences(int inb_sequence , int *iidentifier , int *ilength , + int **ivertex_identifier , index_parameter_type iindex_param_type , int inb_variable , + stat_tool::variable_nature *itype , bool vertex_identifier_copy, bool init_flag) + +: min_interval(NULL), + self_transition(NULL), + observation_distribution(NULL), + observation_histogram(NULL), + characteristics(NULL), + Sequences(inb_sequence , iidentifier , ilength , ivertex_identifier , + iindex_param_type , inb_variable , itype , + vertex_identifier_copy , init_flag) +{ + init(); + +} + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a MarkovianSequences exactly as a Sequences object + * + * \param[in] ilength_distribution sequence length frequency distribution, + * \param[in] inb_variable number of variables, + * \param[in] itype variable types, + * \param[in] init_flag flag initialization. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences::MarkovianSequences(const stat_tool::FrequencyDistribution &ilength_distribution , int inb_variable , + stat_tool::variable_nature *itype , bool init_flag) +: min_interval(NULL), + self_transition(NULL), + observation_distribution(NULL), + observation_histogram(NULL), + characteristics(NULL), + Sequences(ilength_distribution , inb_variable , itype , init_flag) +{ + init(); +} + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a MarkovianSequences exactly as a Sequences object + * + * \param[in] seq reference on a Sequences object, + * \param[in] variable variable index, + * \param[in] itype selected variable type. + */ +/*--------------------------------------------------------------*/ +MarkovianSequences::MarkovianSequences(const MarkovianSequences &seq , int variable , stat_tool::variable_nature itype) +: min_interval(NULL), + self_transition(NULL), + observation_distribution(NULL), + observation_histogram(NULL), + characteristics(NULL), + Sequences(seq , variable , itype) +{ + init(); +} + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a MarkovianSequences object. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] param addition/removing of the initial run length frequency distributions. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::copy(const MarkovianSequences &seq , initial_run param) + +{ + bool initial_run_flag; + int i , j; + + + min_interval = new double[nb_variable]; + for (i = 0;i < nb_variable;i++) { + min_interval[i] = seq.min_interval[i]; + } + + if (seq.self_transition) { + self_transition = new SelfTransition*[marginal_distribution[0]->nb_value]; + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (seq.self_transition[i]) { + self_transition[i] = new SelfTransition(*(seq.self_transition[i])); + } + else { + self_transition[i] = NULL; + } + } + } + + else { + self_transition = NULL; + } + + if (seq.observation_distribution) { + observation_distribution = new FrequencyDistribution**[nb_variable]; + observation_distribution[0] = NULL; + + for (i = 1;i < nb_variable;i++) { + if (seq.observation_distribution[i]) { + observation_distribution[i] = new FrequencyDistribution*[marginal_distribution[0]->nb_value]; + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + observation_distribution[i][j] = new FrequencyDistribution(*(seq.observation_distribution[i][j])); + } + } + + else { + observation_distribution[i] = NULL; + } + } + } + + else { + observation_distribution = NULL; + } + + if (seq.observation_histogram) { + observation_histogram = new Histogram**[nb_variable]; + observation_histogram[0] = NULL; + + for (i = 1;i < nb_variable;i++) { + if (seq.observation_histogram[i]) { + observation_histogram[i] = new Histogram*[marginal_distribution[0]->nb_value]; + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + observation_histogram[i][j] = new Histogram(*(seq.observation_histogram[i][j])); + } + } + + else { + observation_histogram[i] = NULL; + } + } + } + + else { + observation_histogram = NULL; + } + + characteristics = new SequenceCharacteristics*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + if (seq.characteristics[i]) { + if ((param == ADD_INITIAL_RUN) || (param == REMOVE_INITIAL_RUN)) { + switch (param) { + case ADD_INITIAL_RUN : + initial_run_flag = true; + break; + case REMOVE_INITIAL_RUN : + initial_run_flag = false; + break; + } + + characteristics[i] = new SequenceCharacteristics(*(seq.characteristics[i]) , initial_run_flag); + + if (((seq.characteristics[i]->initial_run) && (!initial_run_flag)) || + ((!(seq.characteristics[i]->initial_run)) && (initial_run_flag))) { + build_sojourn_time_frequency_distribution(i , initial_run_flag); + } + } + + else { + characteristics[i] = new SequenceCharacteristics(*(seq.characteristics[i])); + } + } + + else { + characteristics[i] = NULL; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a MarkovianSequences object reversing the direction of sequences. + * + * \param[in] seq reference on a MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::reverse(const MarkovianSequences &seq) + +{ + int i , j; + + + min_interval = new double[nb_variable]; + for (i = 0;i < nb_variable;i++) { + min_interval[i] = seq.min_interval[i]; + } + + self_transition = NULL; + + if (seq.observation_distribution) { + observation_distribution = new FrequencyDistribution**[nb_variable]; + observation_distribution[0] = NULL; + + for (i = 1;i < nb_variable;i++) { + if (seq.observation_distribution[i]) { + observation_distribution[i] = new FrequencyDistribution*[marginal_distribution[0]->nb_value]; + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + observation_distribution[i][j] = new FrequencyDistribution(*(seq.observation_distribution[i][j])); + } + } + + else { + observation_distribution[i] = NULL; + } + } + } + + else { + observation_distribution = NULL; + } + + if (seq.observation_histogram) { + observation_histogram = new Histogram**[nb_variable]; + observation_histogram[0] = NULL; + + for (i = 1;i < nb_variable;i++) { + if (seq.observation_histogram[i]) { + observation_histogram[i] = new Histogram*[marginal_distribution[0]->nb_value]; + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + observation_histogram[i][j] = new Histogram(*(seq.observation_histogram[i][j])); + } + } + + else { + observation_histogram[i] = NULL; + } + } + } + + else { + observation_histogram = NULL; + } + + characteristics = new SequenceCharacteristics*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + if (seq.characteristics[i]) { + characteristics[i] = new SequenceCharacteristics(*(seq.characteristics[i]) , REVERSE); + + build_index_value(i); + build_first_occurrence_frequency_distribution(i); + + if (!(seq.characteristics[i]->initial_run)) { + build_sojourn_time_frequency_distribution(i); + } + } + + else { + characteristics[i] = NULL; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a MarkovianSequences object adding a state variable. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] param addition/removing of the initial run length frequency distributions. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::add_state_variable(const MarkovianSequences &seq , initial_run param) + +{ + bool initial_run_flag; + int i; + + + min_interval = new double[nb_variable]; + min_interval[0] = 0.; + for (i = 0;i < seq.nb_variable;i++) { + min_interval[i + 1] = seq.min_interval[i]; + } + + self_transition = NULL; + observation_distribution = NULL; + observation_histogram = NULL; + + characteristics = new SequenceCharacteristics*[nb_variable]; + characteristics[0] = NULL; + + for (i = 0;i < seq.nb_variable;i++) { + if (seq.characteristics[i]) { + if ((param == ADD_INITIAL_RUN) || (param == REMOVE_INITIAL_RUN)) { + switch (param) { + case ADD_INITIAL_RUN : + initial_run_flag = true; + break; + case REMOVE_INITIAL_RUN : + initial_run_flag = false; + break; + } + + characteristics[i + 1] = new SequenceCharacteristics(*(seq.characteristics[i]) , initial_run_flag); + + if (((seq.characteristics[i]->initial_run) && (!initial_run_flag)) || + ((!(seq.characteristics[i]->initial_run)) && (initial_run_flag))) { + build_sojourn_time_frequency_distribution(i + 1 , initial_run_flag); + } + } + + else { + characteristics[i + 1] = new SequenceCharacteristics(*(seq.characteristics[i])); + } + } + + else { + characteristics[i + 1] = NULL; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor by copy of the MarkovianSequences class. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] transform type of transform, + * \param[in] param addition/removing of the initial run length frequency distributions. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences::MarkovianSequences(const MarkovianSequences &seq , sequence_transformation transform , + initial_run param) + +{ + switch (transform) { + case SEQUENCE_COPY : + Sequences::copy(seq); + copy(seq , param); + break; + case REVERSE : + Sequences::reverse(seq); + reverse(seq); + break; + case ADD_STATE_VARIABLE : + Sequences::add_state_variable(seq); + add_state_variable(seq , param); + break; + case EXPLICIT_INDEX_PARAMETER : + Sequences::explicit_index_parameter(seq); + copy(seq); + break; + case REMOVE_INDEX_PARAMETER : + Sequences::remove_index_parameter(seq); + copy(seq); + break; + default : + Sequences::copy(seq); + copy(seq); + break; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of data members of the MarkovianSequences class. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::remove() + +{ + int i , j; + + + delete [] min_interval; + + if (self_transition) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + delete self_transition[i]; + } + delete [] self_transition; + } + + if (observation_distribution) { + for (i = 1;i < nb_variable;i++) { + if (observation_distribution[i]) { + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + if (observation_distribution[i][j] != NULL) + delete observation_distribution[i][j]; + } + delete [] observation_distribution[i]; + } + } + delete [] observation_distribution; + } + + if (observation_histogram != NULL) { + for (i = 1;i < nb_variable;i++) { + if (observation_histogram[i] != NULL) { + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + if (observation_histogram[i][j] != NULL) + delete observation_histogram[i][j]; + } + delete [] observation_histogram[i]; + } + } + delete [] observation_histogram; + } + + if (characteristics) { + for (i = 0;i < nb_variable;i++) { + delete characteristics[i]; + } + delete [] characteristics; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the MarkovianSequences class. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences::~MarkovianSequences() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignement operator of the MarkovianSequences class. + * + * \param[in] seq reference on a MarkovianSequences object. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences& MarkovianSequences::operator=(const MarkovianSequences &seq) + +{ + if (&seq != this) { + remove(); + Sequences::remove(); + + Sequences::copy(seq); + copy(seq); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Initialization of a state variable (1st variable). + * + * \param[in] itype 1st variable type (STATE/INT_VALUE/REAL_VALUE). + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::state_variable_init(variable_nature itype) + +{ + int i , j; + + + if (itype != type[0]) { + if (type[0] == STATE) { + if (self_transition) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + delete self_transition[i]; + } + delete [] self_transition; + + self_transition = NULL; + } + + if (observation_distribution) { + for (i = 1;i < nb_variable;i++) { + if (observation_distribution[i]) { + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + delete observation_distribution[i][j]; + } + delete [] observation_distribution[i]; + } + } + delete [] observation_distribution; + + observation_distribution = NULL; + } + + if (observation_histogram) { + for (i = 1;i < nb_variable;i++) { + if (observation_histogram[i]) { + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + delete observation_histogram[i][j]; + } + delete [] observation_histogram[i]; + } + } + delete [] observation_histogram; + + observation_histogram = NULL; + } + } + + type[0] = itype; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a frequency distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] type frequency distribution type, + * \param[in] variable variable index, + * \param[in] value value. + * + * \return DiscreteDistributionData object. + */ +/*--------------------------------------------------------------*/ + +DiscreteDistributionData* MarkovianSequences::extract(StatError &error , process_distribution type , + int variable , int value) const + +{ + bool status = true; + FrequencyDistribution *phisto; + DiscreteDistributionData *histo; + + + histo = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (!characteristics[variable]) { + status = false; + error.update(SEQ_error[SEQR_CHARACTERISTICS_NOT_COMPUTED]); + } + + else if ((value < 0) || (value >= marginal_distribution[variable]->nb_value)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VALUE] << " " << value << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + if (status) { + switch (type) { + + case FIRST_OCCURRENCE : { + phisto = characteristics[variable]->first_occurrence[value]; + break; + } + + case RECURRENCE_TIME : { + phisto = characteristics[variable]->recurrence_time[value]; + break; + } + + case SOJOURN_TIME : { + phisto = characteristics[variable]->sojourn_time[value]; + break; + } + + case INITIAL_RUN : { + if (characteristics[variable]->initial_run) { + phisto = characteristics[variable]->initial_run[value]; + } + else { + phisto = NULL; + status = false; + error.update(STAT_error[STATR_NON_EXISTING_FREQUENCY_DISTRIBUTION]); + } + break; + } + + case FINAL_RUN : { + phisto = characteristics[variable]->final_run[value]; + break; + } + + case NB_RUN : { + if (characteristics[variable]->nb_run) { + phisto = characteristics[variable]->nb_run[value]; + } + else { + phisto = NULL; + status = false; + error.update(STAT_error[STATR_NON_EXISTING_FREQUENCY_DISTRIBUTION]); + } + break; + } + + case NB_OCCURRENCE : { + if (characteristics[variable]->nb_occurrence) { + phisto = characteristics[variable]->nb_occurrence[value]; + } + else { + phisto = NULL; + status = false; + error.update(STAT_error[STATR_NON_EXISTING_FREQUENCY_DISTRIBUTION]); + } + break; + } + } + + if ((phisto) && (phisto->nb_element == 0)) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + } + } + + if (status) { + histo = new DiscreteDistributionData(*phisto); + } + + return histo; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of MarkovianSequences objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of MarkovianSequences objects, + * \param[in] iseq pointer on the MarkovianSequences objects. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::merge(StatError &error , int nb_sample , + const MarkovianSequences **iseq) const + +{ + bool status = true; + int i , j , k , m , n , p , q; + int inb_sequence , cumul_nb_sequence , nb_histo , *ilength , *iidentifier , + **ivertex_identifier; + const FrequencyDistribution **phisto; + MarkovianSequences *seq; + const MarkovianSequences **pseq; + + + seq = NULL; + error.init(); + + for (i = 0;i < nb_sample;i++) { + if (iseq[i]->index_param_type != index_param_type) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_error[SEQR_INDEX_PARAMETER_TYPE]; + + if (index_param_type == IMPLICIT_TYPE) { + error.update((error_message.str()).c_str()); + } + else { + error.correction_update((error_message.str()).c_str() , SEQ_index_parameter_word[index_param_type]); + } + } + } + + for (i = 0;i < nb_sample;i++) { + if (iseq[i]->nb_variable != nb_variable) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << STAT_error[STATR_NB_VARIABLE]; + error.correction_update((error_message.str()).c_str() , nb_variable); + } + + else { + for (j = 0;j < nb_variable;j++) { + if (iseq[i]->type[j] != type[j]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << STAT_label[STATL_VARIABLE] << " " << j + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[type[j]]); + } + } + } + } + + if (status) { + nb_sample++; + pseq = new const MarkovianSequences*[nb_sample]; + + pseq[0] = this; + for (i = 1;i < nb_sample;i++) { + pseq[i] = iseq[i - 1]; + } + + // computation of the number of sequences + + inb_sequence = 0; + for (i = 0;i < nb_sample;i++) { + inb_sequence += pseq[i]->nb_sequence; + } + + // comparison of sequence identifiers + + iidentifier = new int[inb_sequence]; + + cumul_nb_sequence = 0; + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + iidentifier[i] = pseq[j]->identifier[k]; + + for (m = 0;m < cumul_nb_sequence;m++) { + if (iidentifier[i] == iidentifier[m]) { + delete [] iidentifier; + iidentifier = NULL; + break; + } + } + + if (!iidentifier) { + break; + } + i++; + } + + if (!iidentifier) { + break; + } + cumul_nb_sequence += pseq[j]->nb_sequence; + } + + // copy of sequence lengths + + ilength = new int[inb_sequence]; + + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + ilength[i++] = pseq[j]->length[k]; + } + } + + // comparison of vertex identifiers + + for (i = 0;i < nb_sample;i++) { + if (!(pseq[i]->vertex_identifier)) { + break; + } + } + + if (i == nb_sample) { + ivertex_identifier = new int*[inb_sequence]; + + cumul_nb_sequence = 0; + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + ivertex_identifier[i] = new int[pseq[j]->length[k]]; + for (m = 0;m < pseq[j]->length[k];m++) { + ivertex_identifier[i][m] = pseq[j]->vertex_identifier[k][m]; + + for (n = 0;n < cumul_nb_sequence;n++) { + for (p = 0;p < ilength[n];p++) { + if (ivertex_identifier[i][m] == ivertex_identifier[n][p]) { + for (q = 0;q <= i;q++) { + delete [] ivertex_identifier[q]; + } + delete [] ivertex_identifier; + ivertex_identifier = NULL; + break; + } + } + + if (!ivertex_identifier) { + break; + } + } + + if (!ivertex_identifier) { + break; + } + } + + if (!ivertex_identifier) { + break; + } + i++; + } + + if (!ivertex_identifier) { + break; + } + cumul_nb_sequence += pseq[j]->nb_sequence; + } + } + + else { + ivertex_identifier = NULL; + } + + seq = new MarkovianSequences(inb_sequence , iidentifier , ilength , ivertex_identifier , + index_param_type , nb_variable , type); + delete [] iidentifier; + delete [] ilength; + + if (ivertex_identifier) { + for (i = 0;i < inb_sequence;i++) { + delete [] ivertex_identifier[i]; + } + delete [] ivertex_identifier; + } + + phisto = new const FrequencyDistribution*[nb_sample]; + + // copy of index parameters + + if (index_param_type == TIME) { + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + for (m = 0;m < pseq[j]->length[k];m++) { + seq->index_parameter[i][m] = pseq[j]->index_parameter[k][m]; + } + i++; + } + } + + for (i = 0;i < nb_sample;i++) { + phisto[i] = pseq[i]->index_parameter_distribution; + } + seq->index_parameter_distribution = new FrequencyDistribution(nb_sample , phisto); + + for (i = 0;i < nb_sample;i++) { + phisto[i] = pseq[i]->index_interval; + } + seq->index_interval = new FrequencyDistribution(nb_sample , phisto); + } + + // copy of values + + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + for (m = 0;m < pseq[j]->nb_variable;m++) { + if ((pseq[j]->type[m] != REAL_VALUE) && (pseq[j]->type[m] != AUXILIARY)) { + for (n = 0;n < pseq[j]->length[k];n++) { + seq->int_sequence[i][m][n] = pseq[j]->int_sequence[k][m][n]; + } + } + + else { + for (n = 0;n < pseq[j]->length[k];n++) { + seq->real_sequence[i][m][n] = pseq[j]->real_sequence[k][m][n]; + } + } + } + i++; + } + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value[i] = pseq[0]->min_value[i]; + seq->max_value[i] = pseq[0]->max_value[i]; + for (j = 1;j < nb_sample;j++) { + if (pseq[j]->min_value[i] < seq->min_value[i]) { + seq->min_value[i] = pseq[j]->min_value[i]; + } + if (pseq[j]->max_value[i] > seq->max_value[i]) { + seq->max_value[i] = pseq[j]->max_value[i]; + } + } + + if (seq->type[i] != AUXILIARY) { + if (seq->type[i] != REAL_VALUE) { + for (j = 0;j < nb_sample;j++) { + phisto[j] = pseq[j]->marginal_distribution[i]; + } + seq->marginal_distribution[i] = new FrequencyDistribution(nb_sample , phisto); + } + + else { + seq->build_marginal_histogram(i); + } + + seq->min_interval[i] = pseq[0]->min_interval[i]; + for (j = 1;j < nb_sample;j++) { + if (pseq[j]->min_interval[i] < seq->min_interval[i]) { + seq->min_interval[i] = pseq[j]->min_interval[i]; + } + } + + for (j = 0;j < nb_sample;j++) { + if (!(pseq[j]->characteristics[i])) { + break; + } + } + + if (j == nb_sample) { + seq->characteristics[i] = new SequenceCharacteristics(); + + seq->characteristics[i]->nb_value = seq->marginal_distribution[i]->nb_value; + + seq->build_index_value(i); + + seq->characteristics[i]->first_occurrence = new FrequencyDistribution*[seq->marginal_distribution[i]->nb_value]; + seq->characteristics[i]->recurrence_time = new FrequencyDistribution*[seq->marginal_distribution[i]->nb_value]; + + for (j = 0;j < seq->marginal_distribution[i]->nb_value;j++) { + nb_histo = 0; + for (k = 0;k < nb_sample;k++) { + if (j < pseq[k]->marginal_distribution[i]->nb_value) { + phisto[nb_histo++] = pseq[k]->characteristics[i]->first_occurrence[j]; + } + } + seq->characteristics[i]->first_occurrence[j] = new FrequencyDistribution(nb_histo , phisto); + + nb_histo = 0; + for (k = 0;k < nb_sample;k++) { + if (j < pseq[k]->marginal_distribution[i]->nb_value) { + phisto[nb_histo++] = pseq[k]->characteristics[i]->recurrence_time[j]; + } + } + seq->characteristics[i]->recurrence_time[j] = new FrequencyDistribution(nb_histo , phisto); + } + + for (j = 1;j < nb_sample;j++) { + if (((pseq[0]->characteristics[i]->initial_run) && (!(pseq[j]->characteristics[i]->initial_run))) || + ((!(pseq[0]->characteristics[i]->initial_run)) && (pseq[j]->characteristics[i]->initial_run))) { + break; + } + } + + if (j == nb_sample) { + seq->characteristics[i]->sojourn_time = new FrequencyDistribution*[seq->marginal_distribution[i]->nb_value]; + if (pseq[0]->characteristics[i]->initial_run) { + seq->characteristics[i]->initial_run = new FrequencyDistribution*[seq->marginal_distribution[i]->nb_value]; + } + seq->characteristics[i]->final_run = new FrequencyDistribution*[seq->marginal_distribution[i]->nb_value]; + + for (j = 0;j < seq->marginal_distribution[i]->nb_value;j++) { + nb_histo = 0; + for (k = 0;k < nb_sample;k++) { + if (j < pseq[k]->marginal_distribution[i]->nb_value) { + phisto[nb_histo++] = pseq[k]->characteristics[i]->sojourn_time[j]; + } + } + seq->characteristics[i]->sojourn_time[j] = new FrequencyDistribution(nb_histo , phisto); + + if (pseq[0]->characteristics[i]->initial_run) { + nb_histo = 0; + for (k = 0;k < nb_sample;k++) { + if (j < pseq[k]->marginal_distribution[i]->nb_value) { + phisto[nb_histo++] = pseq[k]->characteristics[i]->initial_run[j]; + } + } + seq->characteristics[i]->initial_run[j] = new FrequencyDistribution(nb_histo , phisto); + } + + nb_histo = 0; + for (k = 0;k < nb_sample;k++) { + if (j < pseq[k]->marginal_distribution[i]->nb_value) { + phisto[nb_histo++] = pseq[k]->characteristics[i]->final_run[j]; + } + } + seq->characteristics[i]->final_run[j] = new FrequencyDistribution(nb_histo , phisto); + } + } + + else { + seq->build_sojourn_time_frequency_distribution(i , (characteristics[i]->initial_run ? true : false)); + } + + for (j = 0;j < nb_sample;j++) { + if ((!(pseq[j]->characteristics[i]->nb_run)) && (!(pseq[j]->characteristics[i]->nb_occurrence))) { + break; + } + } + + if (j == nb_sample) { + seq->characteristics[i]->nb_run = new FrequencyDistribution*[seq->marginal_distribution[i]->nb_value]; + seq->characteristics[i]->nb_occurrence = new FrequencyDistribution*[seq->marginal_distribution[i]->nb_value]; + + for (j = 0;j < seq->marginal_distribution[i]->nb_value;j++) { + nb_histo = 0; + for (k = 0;k < nb_sample;k++) { + if (j < pseq[k]->marginal_distribution[i]->nb_value) { + phisto[nb_histo++] = pseq[k]->characteristics[i]->nb_run[j]; + } + } + seq->characteristics[i]->nb_run[j] = new FrequencyDistribution(nb_histo , phisto); + + nb_histo = 0; + for (k = 0;k < nb_sample;k++) { + if (j < pseq[k]->marginal_distribution[i]->nb_value) { + phisto[nb_histo++] = pseq[k]->characteristics[i]->nb_occurrence[j]; + } + } + seq->characteristics[i]->nb_occurrence[j] = new FrequencyDistribution(nb_histo , phisto); + } + } + } + + else { + seq->build_characteristic(i , true , (((characteristics[i]) && (characteristics[i]->initial_run)) ? true : false)); + } + } + } + + delete [] pseq; + delete [] phisto; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of MarkovianSequences objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of MarkovianSequences objects, + * \param[in] iseq pointer on the MarkovianSequences objects. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::merge(StatError &error , int nb_sample , + const vector &iseq) const + +{ + int i; + MarkovianSequences *seq; + const MarkovianSequences **pseq; + + + pseq = new const MarkovianSequences*[nb_sample]; + for (i = 0;i < nb_sample;i++) { + pseq[i] = new MarkovianSequences(iseq[i]); + } + + seq = merge(error , nb_sample , pseq); + + for (i = 0;i < nb_sample;i++) { + delete pseq[i]; + } + delete [] pseq; + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Clustering of values of a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] step clustering step, + * \param[in] mode mode (FLOOR/ROUND/CEIL). + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::cluster(StatError &error , int variable , + int step , rounding mode) const + +{ + bool status = true; + int i; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != REAL_VALUE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + if ((type[variable] == INT_VALUE) && (variable + 1 < nb_variable) && + (type[variable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + } + + if (step < 1) { + status = false; + error.update(STAT_error[STATR_CLUSTERING_STEP]); + } + + if (status) { + seq = new MarkovianSequences(*this , variable , type[variable]); + seq->Sequences::cluster(*this , variable , step , mode); + + for (i = 0;i < seq->nb_variable;i++) { + if (i == variable) { + seq->min_interval_computation(i); + seq->build_characteristic(i , true , (((characteristics[i]) && (characteristics[i]->initial_run)) ? true : false)); + } + + else { + seq->min_interval[i] = min_interval[i]; + if (characteristics[i]) { + seq->characteristics[i] = new SequenceCharacteristics(*(characteristics[i])); + } + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Transcoding of categories of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] ivariable variable index, + * \param[in] category transcoding table, + * \param[in] add_variable flag for adding a variable. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::transcode(StatError &error , int ivariable , + int *category , bool add_variable) const + +{ + bool status = true , *presence; + int i; + int variable , offset , min_category , max_category; + variable_nature *itype; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + + if ((ivariable < 1) || (ivariable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + ivariable--; + + if ((type[ivariable] != INT_VALUE) && (type[ivariable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + if ((ivariable + 1 < nb_variable) && (type[ivariable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << ivariable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + + if (status) { + min_category = marginal_distribution[ivariable]->nb_value; + max_category = 0; + + for (i = 0;i < marginal_distribution[ivariable]->nb_value - marginal_distribution[ivariable]->offset;i++) { + if ((category[i] < 0) || (category[i] >= (add_variable ? marginal_distribution[ivariable]->nb_value - 1 : marginal_distribution[ivariable]->nb_value))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_CATEGORY] << " " << category[i] << " " + << STAT_error[STATR_NOT_ALLOWED]; + error.update((error_message.str()).c_str()); + } + + else { + if (category[i] < min_category) { + min_category = category[i]; + } + if (category[i] > max_category) { + max_category = category[i]; + } + } + } + + if ((min_category != 0) || (max_category == 0)) { + status = false; + error.update(STAT_error[STATR_NB_CATEGORY]); + } + } + + if (status) { + presence = new bool[max_category + 1]; + for (i = 0;i <= max_category;i++) { + presence[i] = false; + } + + for (i = 0;i < marginal_distribution[ivariable]->nb_value - marginal_distribution[ivariable]->offset;i++) { + presence[category[i]] = true; + } + + for (i = 0;i <= max_category;i++) { + if (!presence[i]) { + status = false; + ostringstream error_message; + error_message << STAT_error[STATR_MISSING_CATEGORY] << " " << i; + error.update((error_message.str()).c_str()); + } + } + + delete [] presence; + } + + if (status) { + if (add_variable) { + variable = 0; + offset = 1; + } + else { + variable = ivariable; + offset = 0; + } + + itype = new variable_nature[nb_variable + offset]; + for (i = 0;i < nb_variable;i++) { + itype[i + offset] = type[i]; + } + itype[variable] = INT_VALUE; + + seq = new MarkovianSequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable + offset , itype); + delete [] itype; + + seq->Sequences::transcode(*this , ivariable , 0 , max_category , category , add_variable); + + for (i = 0;i < seq->nb_variable;i++) { + if (i == variable) { + seq->min_interval_computation(i); + seq->build_characteristic(i , true , (((characteristics[ivariable]) && (characteristics[ivariable]->initial_run)) ? true : false)); + } + + else { + seq->min_interval[i] = min_interval[i - offset]; + if (characteristics[i - offset]) { + seq->characteristics[i] = new SequenceCharacteristics(*(characteristics[i - offset])); + } + } + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Transcoding of categories of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] ivariable variable index, + * \param[in] category transcoding table, + * \param[in] add_variable flag for adding a variable. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::transcode(StatError &error , int ivariable , + vector &category , bool add_variable) const + +{ + return transcode(error , ivariable , category.data() , add_variable); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Transcoding of categories of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] process reference on a CategoricalSequenceProcess object. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::transcode(StatError &error , + const CategoricalSequenceProcess *process) const + +{ + int i , j; + int *category; + MarkovianSequences *seq; + + + category = new int[process->nb_value]; + for (i = 0;i < process->nb_state;i++) { + for (j = 0;j < process->nb_value;j++) { + if (process->observation[i]->mass[j] > 0.) { + category[j] = i; + } + } + } + + seq = transcode(error , 1 , category , true); + delete [] category; + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Removing of the non-represented values of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the non-represented values, + * \param[in] ivariable variable index, + * \param[in] add_variable flag for adding a variable. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::consecutive_values(StatError &error , ostream *os , + int ivariable , bool add_variable) const + +{ + bool status = true; + int i , j; + int variable , offset , max , *category; + variable_nature *itype; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + + if ((ivariable < 1) || (ivariable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + ivariable--; + + if ((type[ivariable] != INT_VALUE) && (type[ivariable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + for (i = 0;i < marginal_distribution[ivariable]->nb_value;i++) { + if (marginal_distribution[ivariable]->frequency[i] == 0) { + break; + } + } + + if (i == marginal_distribution[ivariable]->nb_value) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << ivariable + 1 << ": " + << SEQ_error[SEQR_CONSECUTIVE_VALUES]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + if (os) { + *os << "\n" << SEQ_label[SEQL_MISSING_VALUE] << ":"; + for (i = 0;i < marginal_distribution[ivariable]->nb_value;i++) { + if (marginal_distribution[ivariable]->frequency[i] == 0) { + *os << " " << i; + } + } + *os << endl; + } + + category = new int[marginal_distribution[ivariable]->nb_value - marginal_distribution[ivariable]->offset]; + +// i = 0; + i = -1; + for (j = marginal_distribution[ivariable]->offset;j < marginal_distribution[ivariable]->nb_value;j++) { +// category[j - marginal_distribution[ivariable]->offset] = i; + if (marginal_distribution[ivariable]->frequency[j] > 0) { + i++; + } + category[j - marginal_distribution[ivariable]->offset] = i; + } +// max = i - 1; + max = i; + +# ifdef DEBUG + cout << "\nTest :"; + for (i = 0;i < marginal_distribution[ivariable]->nb_value - marginal_distribution[ivariable]->offset;i++) { + cout << " " << category[i]; + } + cout << endl; +# endif + + if (add_variable) { + variable = 0; + offset = 1; + } + else { + variable = ivariable; + offset = 0; + } + + itype = new variable_nature[nb_variable + offset]; + for (i = 0;i < nb_variable;i++) { + itype[i + offset] = type[i]; + } + itype[variable] = INT_VALUE; + + seq = new MarkovianSequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable + offset , itype); + delete [] itype; + + seq->Sequences::transcode(*this , ivariable , 0 , max , category , add_variable); + delete [] category; + + for (i = 0;i < seq->nb_variable;i++) { + if (i == variable) { + seq->min_interval_computation(i); + seq->build_characteristic(i , true , (((characteristics[ivariable]) && (characteristics[ivariable]->initial_run)) ? true : false)); + } + + else if (characteristics[i - offset]) { + seq->min_interval[i] = min_interval[i - offset]; + seq->characteristics[i] = new SequenceCharacteristics(*(characteristics[i - offset])); + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] ivariable variable index, + * \param[in] nb_class number of classes, + * \param[in] ilimit integer limits between classes (beginning of classes), + * \param[in] add_variable flag for adding a variable. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::cluster(StatError &error , int ivariable , int nb_class , + int *ilimit , bool add_variable) const + +{ + bool status = true; + int i , j , k; + int variable , offset , *category , *limit; + variable_nature *itype; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + + if ((ivariable < 1) || (ivariable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + ivariable--; + + if ((type[ivariable] != INT_VALUE) && (type[ivariable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else if ((nb_class < 2) || (nb_class >= marginal_distribution[ivariable]->nb_value)) { + status = false; + error.update(STAT_error[STATR_NB_CLASS]); + } + + if ((ivariable + 1 < nb_variable) && (type[ivariable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << ivariable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + } + + if (status) { + limit = new int[nb_class + 1]; + limit[0] = marginal_distribution[ivariable]->offset; + for (i = 1;i < nb_class;i++) { + limit[i] = ilimit[i - 1]; + } + limit[nb_class] = marginal_distribution[ivariable]->nb_value; + + for (i = 1;i <= nb_class;i++) { + if (limit[i] <= limit[i - 1]) { + status = false; + error.update(STAT_error[STATR_CLUSTER_LIMIT]); + } + } + + if (status) { + category = new int[marginal_distribution[ivariable]->nb_value]; + + i = 0; + for (j = 0;j < nb_class;j++) { + for (k = limit[j];k < limit[j + 1];k++) { + category[i++] = j; + } + } + + if (add_variable) { + variable = 0; + offset = 1; + } + else { + variable = ivariable; + offset = 0; + } + + itype = new variable_nature[nb_variable + offset]; + for (i = 0;i < nb_variable;i++) { + itype[i + offset] = type[i]; + } + itype[variable] = INT_VALUE; + + seq = new MarkovianSequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable + offset , itype); + delete [] itype; + + seq->Sequences::transcode(*this , ivariable , 0 , nb_class - 1 , category , add_variable); + delete [] category; + + for (i = 0;i < seq->nb_variable;i++) { + if (i == variable) { + seq->min_interval_computation(i); + seq->build_characteristic(i , true , (((characteristics[ivariable]) && (characteristics[ivariable]->initial_run)) ? true : false)); + } + else { + seq->min_interval[i] = min_interval[i - offset]; + if (characteristics[i - offset]) { + seq->characteristics[i] = new SequenceCharacteristics(*(characteristics[i - offset])); + } + } + } + } + + delete [] limit; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] ivariable variable index, + * \param[in] nb_class number of classes, + * \param[in] ilimit integer limits between classes (beginning of classes), + * \param[in] add_variable flag for adding a variable. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::cluster(StatError &error , int ivariable , int nb_class , + vector &ilimit , bool add_variable) const + +{ + return cluster(error , ivariable , nb_class , ilimit.data() , add_variable); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of a real-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] nb_class number of classes, + * \param[in] ilimit real limits between classes (beginning of classes). + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::cluster(StatError &error , int variable , + int nb_class , double *ilimit) const + +{ + bool status = true; + int i; + double *limit; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] != REAL_VALUE) { + status = false; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , STAT_variable_word[REAL_VALUE]); + } + + if ((variable + 1 < nb_variable) && (type[variable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + } + + if (nb_class < 2) { + status = false; + error.update(STAT_error[STATR_NB_CLASS]); + } + + if (status) { + limit = new double[nb_class + 1]; + limit[0] = min_value[variable]; + for (i = 1;i < nb_class;i++) { + limit[i] = ilimit[i - 1]; + } + limit[nb_class] = max_value[variable] + DOUBLE_ERROR; + + for (i = 0;i < nb_class;i++) { + if (limit[i] >= limit[i + 1]) { + status = false; + error.update(STAT_error[STATR_CLUSTER_LIMIT]); + } + } + + if (status) { + seq = new MarkovianSequences(*this , variable , type[variable]); + seq->Sequences::cluster(*this , variable , nb_class , limit); + + for (i = 0;i < seq->nb_variable;i++) { + if (i == variable) { + seq->min_interval_computation(i); + seq->build_characteristic(i); + } + + else { + seq->min_interval[i] = min_interval[i]; + if (characteristics[i]) { + seq->characteristics[i] = new SequenceCharacteristics(*(characteristics[i])); + } + } + } + } + + delete [] limit; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of a real-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] nb_class number of classes, + * \param[in] ilimit real limits between classes (beginning of classes). + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::cluster(StatError &error , int variable , + int nb_class , vector &ilimit) const + +{ + return cluster(error , variable , nb_class , ilimit.data()); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a MarkovianSequences object transforming the implicit index parameters in + * explicit index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::explicit_index_parameter(StatError &error) const + +{ + MarkovianSequences *seq; + + + error.init(); + + if (index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new MarkovianSequences(*this , EXPLICIT_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Removing of the index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::remove_index_parameter(StatError &error) const + +{ + MarkovianSequences *seq; + + + error.init(); + + if (!index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new MarkovianSequences(*this , REMOVE_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of variables. + * + * \param[in] error reference on a StatError object, + * \param[in] inb_variable number of variables, + * \param[in] ivariable variable indices, + * \param[in] keep flag for keeping or rejecting the selected variables. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::select_variable(StatError &error , int inb_variable , + int *ivariable , bool keep) const + +{ + bool status = true , *selected_variable; + int i; + int bnb_variable , *variable; + variable_nature *itype; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + + if ((inb_variable < 1) || (inb_variable > (keep ? nb_variable : nb_variable - 1))) { + status = false; + error.update(STAT_error[STATR_NB_SELECTED_VARIABLE]); + } + + else { + selected_variable = new bool[nb_variable + 1]; + for (i = 1;i <= nb_variable;i++) { + selected_variable[i] = false; + } + + for (i = 0;i < inb_variable;i++) { + if ((ivariable[i] < 1) || (ivariable[i] > nb_variable)) { + status = false; + ostringstream error_message; + error_message << ivariable[i] << ": " << STAT_error[STATR_VARIABLE_INDEX]; + error.update((error_message.str()).c_str()); + } + + else if (selected_variable[ivariable[i]]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << ivariable[i] << " " + << STAT_error[STATR_ALREADY_SELECTED]; + error.update((error_message.str()).c_str()); + } + else { + selected_variable[ivariable[i]] = true; + } + } + + delete [] selected_variable; + } + + if (status) { + variable = ::select_variable(nb_variable , inb_variable , ivariable , keep); + + bnb_variable = (keep ? inb_variable : nb_variable - inb_variable); + + for (i = 0;i < bnb_variable;i++) { + if ((type[variable[i]] == AUXILIARY) && + ((i == 0) || (variable[i - 1] != variable[i] - 1))) { + status = false; + error.update(SEQ_error[SEQR_VARIABLE_INDICES]); + } + } + + if (status) { + itype = new variable_nature[bnb_variable]; + for (i = 0;i < bnb_variable;i++) { + itype[i] = type[variable[i]]; + } + + seq = new MarkovianSequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , bnb_variable , itype); + + seq->Sequences::select_variable(*this , variable); + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_interval[i] = min_interval[variable[i]]; + if (characteristics[variable[i]]) { + seq->characteristics[i] = new SequenceCharacteristics(*(characteristics[variable[i]])); + } + } + + delete [] itype; + } + + delete [] variable; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of variables. + * + * \param[in] error reference on a StatError object, + * \param[in] inb_variable number of variables, + * \param[in] ivariable variable indices, + * \param[in] keep flag for keeping or rejecting the selected variables. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::select_variable(StatError &error , int inb_variable , + vector &ivariable , bool keep) const + +{ + return select_variable(error , inb_variable , ivariable.data() , keep); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Removing of the 1st variable. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::remove_variable_1() const + +{ + int i; + int *variable; + variable_nature *itype; + MarkovianSequences *seq; + + + variable = new int[nb_variable - 1]; + itype = new variable_nature[nb_variable - 1]; + for (i = 0;i < nb_variable - 1;i++) { + variable[i] = i + 1; + itype[i] = type[i + 1]; + } + + seq = new MarkovianSequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable - 1 , itype); + + seq->Sequences::select_variable(*this , variable); + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_interval[i] = min_interval[i + 1]; + if (characteristics[i + 1]) { + seq->characteristics[i] = new SequenceCharacteristics(*(characteristics[i + 1])); + } + } + + delete [] variable; + delete [] itype; + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of variables of MarkovianSequences objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of MarkovianSequences objects, + * \param[in] iseq pointer on the MarkovianSequences objects, + * \param[in] ref_sample reference MarkovianSequences object for the identifiers. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::merge_variable(StatError &error , int nb_sample , + const MarkovianSequences **iseq , int ref_sample) const + +{ + bool status = true; + int i , j , k , m; + int inb_variable , *iidentifier , **ivertex_identifier; + variable_nature *itype; + MarkovianSequences *seq; + const MarkovianSequences **pseq; + + + seq = NULL; + error.init(); + + for (i = 0;i < nb_sample;i++) { + if ((iseq[i]->index_param_type != IMPLICIT_TYPE) && + (iseq[i]->index_param_type != index_param_type)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_error[SEQR_INDEX_PARAMETER_TYPE]; + + if (index_param_type == IMPLICIT_TYPE) { + error.update((error_message.str()).c_str()); + } + else { + error.correction_update((error_message.str()).c_str() , SEQ_index_parameter_word[index_param_type]); + } + } + + if (iseq[i]->nb_sequence != nb_sequence) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_error[SEQR_NB_SEQUENCE]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < nb_sequence;j++) { + if (iseq[i]->length[j] != length[j]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_label[SEQL_SEQUENCE] << " " << j + 1 << ": " + << SEQ_error[SEQR_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + + else if ((iseq[i]->index_param_type == TIME) && + (iseq[i]->index_param_type == index_param_type)) { + for (k = 0;k < length[j];k++) { + if (iseq[i]->index_parameter[j][k] != index_parameter[j][k]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_label[SEQL_SEQUENCE] << " " << j + 1 << ": " + << SEQ_label[SEQL_INDEX] << " " << k << ": " + << SEQ_error[SEQR_INDEX_PARAMETER]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + if ((ref_sample != I_DEFAULT) && ((ref_sample < 1) || (ref_sample > nb_sample + 1))) { + status = false; + error.update(STAT_error[STATR_SAMPLE_INDEX]); + } + + if (status) { + nb_sample++; + pseq = new const MarkovianSequences*[nb_sample]; + + pseq[0] = this; + inb_variable = nb_variable; + for (i = 1;i < nb_sample;i++) { + pseq[i] = iseq[i - 1]; + inb_variable += iseq[i - 1]->nb_variable; + } + + // comparison of sequence identifiers + + if (ref_sample == I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_sample;j++) { + if (pseq[j]->identifier[i] != pseq[0]->identifier[i]) { + break; + } + } + if (j < nb_sample) { + break; + } + } + + if (i < nb_sequence) { + iidentifier = NULL; + } + else { + iidentifier = pseq[0]->identifier; + } + } + + else { + ref_sample--; + iidentifier = pseq[ref_sample]->identifier; + } + + // comparison of vertex identifiers + + if (ref_sample == I_DEFAULT) { + for (i = 0;i < nb_sample;i++) { + if (!(pseq[i]->vertex_identifier)) { + break; + } + } + + if (i == nb_sample) { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->length[i];k++) { + if (pseq[j]->vertex_identifier[i][k] != pseq[0]->vertex_identifier[i][k]) { + break; + } + } + + if (k < pseq[j]->length[i]) { + break; + } + } + + if (j < nb_sample) { + break; + } + } + + if (i < nb_sequence) { + ivertex_identifier = NULL; + } + else { + ivertex_identifier = pseq[0]->vertex_identifier; + } + } + + else { + ivertex_identifier = NULL; + } + } + + else { + ivertex_identifier = pseq[ref_sample]->vertex_identifier; + } + + itype = new variable_nature[inb_variable]; + inb_variable = 0; + for (i = 0;i < nb_sample;i++) { + for (j = 0;j < pseq[i]->nb_variable;j++) { + itype[inb_variable] = pseq[i]->type[j]; + if ((inb_variable > 0) && (itype[inb_variable] == STATE)) { + itype[inb_variable] = INT_VALUE; + } + inb_variable++; + } + } + + seq = new MarkovianSequences(nb_sequence , iidentifier , length , ivertex_identifier , + index_param_type , inb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + // copy of values + + for (i = 0;i < nb_sequence;i++) { + inb_variable = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_variable;k++) { + if ((seq->type[inb_variable] != REAL_VALUE) && (seq->type[inb_variable] != AUXILIARY)) { + for (m = 0;m < length[i];m++) { + seq->int_sequence[i][inb_variable][m] = pseq[j]->int_sequence[i][k][m]; + } + } + + else { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][inb_variable][m] = pseq[j]->real_sequence[i][k][m]; + } + } + + inb_variable++; + } + } + } + + inb_variable = 0; + for (i = 0;i < nb_sample;i++) { + for (j = 0;j < pseq[i]->nb_variable;j++) { + seq->min_value[inb_variable] = pseq[i]->min_value[j]; + seq->max_value[inb_variable] = pseq[i]->max_value[j]; + + if (pseq[i]->marginal_distribution[j]) { + seq->marginal_distribution[inb_variable] = new FrequencyDistribution(*(pseq[i]->marginal_distribution[j])); + } + if (pseq[i]->marginal_histogram[j]) { + seq->marginal_histogram[inb_variable] = new Histogram(*(pseq[i]->marginal_histogram[j])); + } + + seq->min_interval[inb_variable] = pseq[i]->min_interval[j]; + + if (pseq[i]->characteristics[j]) { + seq->characteristics[inb_variable] = new SequenceCharacteristics(*(pseq[i]->characteristics[j])); + } + + inb_variable++; + } + } + + delete [] pseq; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of variables of MarkovianSequences objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of MarkovianSequences objects, + * \param[in] iseq pointer on the MarkovianSequences objects, + * \param[in] ref_sample reference MarkovianSequences object for the identifiers. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::merge_variable(StatError &error , int nb_sample , + const vector &iseq , int ref_sample) const + +{ + int i; + MarkovianSequences *seq; + const MarkovianSequences **pseq; + + + pseq = new const MarkovianSequences*[nb_sample]; + for (i = 0;i < nb_sample;i++) { + pseq[i] = new MarkovianSequences(iseq[i]); + } + + seq = merge_variable(error , nb_sample , pseq , ref_sample); + + for (i = 0;i < nb_sample;i++) { + delete pseq[i]; + } + delete [] pseq; + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of sequences with extraction of the initial runs. + * + * \param[in] error reference on a StatError object. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + + MarkovianSequences* MarkovianSequences::initial_run_computation(StatError &error) const + +{ + int i; + MarkovianSequences *seq; + + + error.init(); + + for (i = 0;i < nb_variable;i++) { + if ((characteristics[i]) && (characteristics[i]->initial_run)) { + break; + } + } + + if (i < nb_variable) { + seq = NULL; + error.update(SEQ_error[SEQR_INITIAL_RUN_ALREADY_BUILT]); + } + + else { + seq = new MarkovianSequences(*this , SEQUENCE_COPY , ADD_INITIAL_RUN); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Addition of an absorbing run at the end of each sequence. + * + * \param[in] error reference on a StatError object, + * \param[in] run_length absorbing run length, + * \param[in] sequence_length sequence length, + * \param[in] add_variable flag for adding a binary variable (0: data, 1: end absorbing run). + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::add_absorbing_run(StatError &error , int run_length , + int sequence_length , bool add_variable) const + +{ + bool status = true , initial_run_flag; + int i , j , k; + int inb_variable , end_value , *ilength; + double mean , variance , limit , *standard_deviation; + variable_nature *itype; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + +/* if (index_param_type == TIME) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } */ + + if ((run_length != I_DEFAULT) && ((run_length < 1) || + (run_length > MAX_ABSORBING_RUN_LENGTH))) { + status = false; + error.update(SEQ_error[SEQR_RUN_LENGTH]); + } + + if ((sequence_length != I_DEFAULT) && ((sequence_length <= max_length) || + (sequence_length > max_length + MAX_ABSORBING_RUN_LENGTH))) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_LENGTH]); + } + + if (status) { + if (add_variable) { + inb_variable = nb_variable + 1; + } + else { + inb_variable = nb_variable; + } + + itype = new variable_nature[inb_variable]; + for (i = 0;i < nb_variable;i++) { + itype[i] = type[i]; + } + if (add_variable) { + itype[nb_variable] = INT_VALUE; + } + + ilength = new int[nb_sequence]; + + if (run_length == I_DEFAULT) { + if (sequence_length == I_DEFAULT) { + sequence_length = max_length + ABSORBING_RUN_LENGTH; + } + + for (i = 0;i < nb_sequence;i++) { + ilength[i] = sequence_length; + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + ilength[i] = length[i] + run_length; + } + } + + seq = new MarkovianSequences(nb_sequence , identifier , ilength , vertex_identifier , + index_param_type , inb_variable , itype , false); + delete [] itype; + delete [] ilength; + + // copy of vertex identifiers + + if (vertex_identifier) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + seq->vertex_identifier[i][j] = vertex_identifier[i][j]; + } + for (j = length[i];j < seq->length[i];j++) { + seq->vertex_identifier[i][j] = I_DEFAULT; + } + } + } + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + for (j = length[i];j < seq->length[i];j++) { + seq->index_parameter[i][j] = seq->index_parameter[i][j - 1] + 1; + } + } + + seq->build_index_parameter_frequency_distribution(); + if (index_interval) { + seq->index_interval_computation(); + } + } + + standard_deviation = new double[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + if ((type[i] == REAL_VALUE) || (type[i] == AUXILIARY)) { + mean = mean_computation(i); + variance = variance_computation(i , mean); + standard_deviation[i] = sqrt(variance) / ABSORBING_RUN_STANDARD_DEVIATION_FACTOR; + } + } + + // copy of sequences with addition of an end absorbing run + + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if ((seq->type[j] != REAL_VALUE) && (seq->type[j] != AUXILIARY)) { + for (k = 0;k < length[i];k++) { + seq->int_sequence[i][j][k] = int_sequence[i][j][k]; + } + + if (min_value[j] > 0) { + end_value = 0; + } + else { + end_value = max_value[j] + 1; + } + + for (k = length[i];k < seq->length[i];k++) { + seq->int_sequence[i][j][k] = end_value; + } + +# ifdef DEBUG + if (run_length == 1) { // for Fuji/Braeburn GUs + if ((j == 0) || (j == 1)) { + seq->int_sequence[i][j][seq->length[i] - 1] = seq->int_sequence[i][j][seq->length[i] - 2]; + } + else if (j > 2) { + seq->int_sequence[i][j][seq->length[i] - 1] = 0; + } + } +# endif + + } + + else { + for (k = 0;k < length[i];k++) { + seq->real_sequence[i][j][k] = real_sequence[i][j][k]; + } + + // random generation of absorbing run values + + if (min_value[j] >= 10 * standard_deviation[j]) { + normal dist(min_value[j] - 2 * standard_deviation[j] , standard_deviation[j]); + + for (k = length[i];k < seq->length[i];k++) { + limit = double(rand_unif(mt)); + seq->real_sequence[i][j][k] = quantile(dist , limit); + } + } + + else { + normal dist(max_value[j] + 2 * standard_deviation[j] , standard_deviation[j]); + + for (k = length[i];k < seq->length[i];k++) { + limit = double(rand_unif(mt)); + seq->real_sequence[i][j][k] = quantile(dist , limit); + } + } + +/* if (min_value[j] >= 5 * standard_deviation[j]) { + for (k = length[i];k < seq->length[i];k++) { + seq->real_sequence[i][j][k] = min_value[j] - (k % 2 + 4) * standard_deviation[j]; + } + } + + else { + for (k = length[i];k < seq->length[i];k++) { + seq->real_sequence[i][j][k] = max_value[j] + (k % 2 + 4) * standard_deviation[j]; + } + } */ + } + } + + // addition of a binary variable + + if (add_variable) { + for (j = 0;j < length[i];j++) { + seq->int_sequence[i][nb_variable][j] = 0; + } + for (j = length[i];j < seq->length[i];j++) { + seq->int_sequence[i][nb_variable][j] = 1; + } + } + } + + delete [] standard_deviation; + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + + seq->min_interval_computation(i); + } + + initial_run_flag = false; + for (i = 0;i < nb_variable;i++) { + if ((characteristics[i]) && (characteristics[i]->initial_run)) { + initial_run_flag = true; + break; + } + } + + seq->build_characteristic(I_DEFAULT , true , initial_run_flag); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of auxiliary variables corresponding to + * restored state sequences. + * + * \param[in] discrete_process pointer on DiscreteParametricProcess objects, + * \param[in] continuous_process pointer on ContinuousParametricProcess objects. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::build_auxiliary_variable(DiscreteParametricProcess **discrete_process , + ContinuousParametricProcess **continuous_process) const + +{ + bool *auxiliary; + int i , j , k , m; + int *pstate; + double *mean; + MarkovianSequences *seq; + + + auxiliary = new bool[nb_variable]; + + auxiliary[0] = false; + for (i = 1;i < nb_variable;i++) { + if (((discrete_process) && (discrete_process[i - 1])) || + ((continuous_process) && (continuous_process[i - 1]))) { + auxiliary[i] = true; + } + else { + auxiliary[i] = false; + } + } + + seq = new MarkovianSequences(*this , auxiliary); + + i = 0; + for (j = 1;j < nb_variable;j++) { + i++; + + if ((discrete_process) && (discrete_process[j - 1])) { + i++; + for (k = 0;k < nb_sequence;k++) { + pstate = seq->int_sequence[k][0]; + for (m = 0;m < length[k];m++) { + seq->real_sequence[k][i][m] = discrete_process[j - 1]->observation[*pstate++]->mean; + } + } + } + + else if ((continuous_process) && (continuous_process[j - 1])) { + i++; + + if ((continuous_process[j - 1]->ident == GAMMA) || (continuous_process[j - 1]->ident == ZERO_INFLATED_GAMMA) || + (continuous_process[j - 1]->ident == GAUSSIAN) || (continuous_process[j - 1]->ident == INVERSE_GAUSSIAN) || + (continuous_process[j - 1]->ident == VON_MISES)) { + mean = new double [continuous_process[j - 1]->nb_state]; + + switch (continuous_process[j - 1]->ident) { + + case GAMMA : { + for (k = 0;k < continuous_process[j - 1]->nb_state;k++) { + mean[k] = continuous_process[j - 1]->observation[k]->shape * + continuous_process[j - 1]->observation[k]->scale; + } + break; + } + + case ZERO_INFLATED_GAMMA : { + for (k = 0;k < continuous_process[j - 1]->nb_state;k++) { + if (continuous_process[j - 1]->observation[k]->zero_probability == 1.) { + mean[k] = 0.; + } + else { + mean[k] = (1 - continuous_process[j - 1]->observation[k]->zero_probability) * + continuous_process[j - 1]->observation[k]->shape * + continuous_process[j - 1]->observation[k]->scale; + } + } + break; + } + + default : { + for (k = 0;k < continuous_process[j - 1]->nb_state;k++) { + mean[k] = continuous_process[j - 1]->observation[k]->location; + } + break; + } + } + + for (k = 0;k < nb_sequence;k++) { + pstate = seq->int_sequence[k][0]; + for (m = 0;m < length[k];m++) { + seq->real_sequence[k][i][m] = mean[*pstate++]; + } + } + + delete [] mean; + } + + else if (continuous_process[j - 1]->ident == LINEAR_MODEL) { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (k = 0;k < nb_sequence;k++) { + pstate = seq->int_sequence[k][0]; + for (m = 0;m < length[k];m++) { + seq->real_sequence[k][i][m] = continuous_process[j - 1]->observation[*pstate]->intercept + + continuous_process[j - 1]->observation[*pstate]->slope * m; + pstate++; + } + } + break; + } + + case TIME : { + for (k = 0;k < nb_sequence;k++) { + pstate = seq->int_sequence[k][0]; + for (m = 0;m < length[k];m++) { + seq->real_sequence[k][i][m] = continuous_process[j - 1]->observation[*pstate]->intercept + + continuous_process[j - 1]->observation[*pstate]->slope * index_parameter[k][m]; + pstate++; + } + } + break; + } + } + } + + else if (continuous_process[j - 1]->ident == AUTOREGRESSIVE_MODEL) { + switch (type[i - 1]) { + + case INT_VALUE : { + for (k = 0;k < nb_sequence;k++) { + pstate = seq->int_sequence[k][0]; + for (m = 0;m < length[k];m++) { + if ((m == 0) || (*pstate != *(pstate - 1))) { + seq->real_sequence[k][i][m] = continuous_process[j - 1]->observation[*pstate]->location; + } + else { + seq->real_sequence[k][i][m] = continuous_process[j - 1]->observation[*pstate]->location + + continuous_process[j - 1]->observation[*pstate]->autoregressive_coeff * + (seq->int_sequence[k][i - 1][m - 1] - continuous_process[j - 1]->observation[*pstate]->location); + } + pstate++; + } + } + break; + } + + case REAL_VALUE : { + for (k = 0;k < nb_sequence;k++) { + pstate = seq->int_sequence[k][0]; + for (m = 0;m < length[k];m++) { + if ((m == 0) || (*pstate != *(pstate - 1))) { + seq->real_sequence[k][i][m] = continuous_process[j - 1]->observation[*pstate]->location; + } + else { + seq->real_sequence[k][i][m] = continuous_process[j - 1]->observation[*pstate]->location + + continuous_process[j - 1]->observation[*pstate]->autoregressive_coeff * + (seq->real_sequence[k][i - 1][m - 1] - continuous_process[j - 1]->observation[*pstate]->location); + } + pstate++; + } + } + break; + } + } + } + } + } + + for (i = 1;i < seq->nb_variable;i++) { + if (seq->type[i] == AUXILIARY) { + seq->min_value_computation(i); + seq->max_value_computation(i); + } + } + + delete [] auxiliary; + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Building of residual sequences on the basis of restored state sequences. + * + * \param[in] categorical_process pointer on CategoricalSequenceProcess objects, + * \param[in] discrete_process pointer on DiscreteParametricProcess objects, + * \param[in] continuous_process pointer on ContinuousParametricProcess objects. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::residual_sequences(CategoricalSequenceProcess **categorical_process , + DiscreteParametricProcess **discrete_process , + ContinuousParametricProcess **continuous_process) const + +{ + int i , j , k; + int *pstate; + double *mean; + variable_nature *itype; + MarkovianSequences *seq; + + + itype = new variable_nature[nb_variable]; + itype[0] = type[0]; + for (i = 1;i < nb_variable;i++) { + itype[i] = REAL_VALUE; + } + + seq = new MarkovianSequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + // copy of restored state sequences + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + seq->int_sequence[i][0][j] = int_sequence[i][0][j]; + } + } + + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + + // computation of residual sequences + + for (i = 1;i < nb_variable;i++) { + if ((categorical_process) && (categorical_process[i - 1])) { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + for (k = 0;k < length[j];k++) { + seq->real_sequence[j][i][k] = int_sequence[j][i][k] - categorical_process[i - 1]->observation[*pstate++]->mean; + } + } + } + + else if ((discrete_process) && (discrete_process[i - 1])) { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + for (k = 0;k < length[j];k++) { + seq->real_sequence[j][i][k] = int_sequence[j][i][k] - discrete_process[i - 1]->observation[*pstate++]->mean; + } + } + } + + else if ((continuous_process) && (continuous_process[i - 1])) { + if ((continuous_process[i - 1]->ident == GAMMA) || (continuous_process[i - 1]->ident == ZERO_INFLATED_GAMMA) || + (continuous_process[i - 1]->ident == GAUSSIAN) || (continuous_process[i - 1]->ident == INVERSE_GAUSSIAN) || + (continuous_process[i - 1]->ident == VON_MISES)) { + mean = new double [continuous_process[i - 1]->nb_state]; + + switch (continuous_process[i - 1]->ident) { + + case GAMMA : { + for (j = 0;j < continuous_process[i - 1]->nb_state;j++) { + mean[j] = continuous_process[i - 1]->observation[j]->shape * + continuous_process[i - 1]->observation[j]->scale; + } + break; + } + + case ZERO_INFLATED_GAMMA : { + for (j = 0;j < continuous_process[i - 1]->nb_state;j++) { + if (continuous_process[i - 1]->observation[j]->zero_probability == 1.) { + mean[j] = 0.; + } + else { + mean[j] = (1 - continuous_process[i - 1]->observation[j]->zero_probability) * + continuous_process[i - 1]->observation[j]->shape * + continuous_process[i - 1]->observation[j]->scale; + } + } + break; + } + + default : { + for (j = 0;j < continuous_process[i - 1]->nb_state;j++) { + mean[j] = continuous_process[i - 1]->observation[j]->location; + } + break; + } + } + + switch (type[i]) { + + case INT_VALUE : { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + for (k = 0;k < length[j];k++) { + seq->real_sequence[j][i][k] = int_sequence[j][i][k] - mean[*pstate++]; + } + } + break; + } + + case REAL_VALUE : { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + for (k = 0;k < length[j];k++) { + seq->real_sequence[j][i][k] = real_sequence[j][i][k] - mean[*pstate++]; + } + } + break; + } + } + + delete [] mean; + } + + else if (continuous_process[i - 1]->ident == LINEAR_MODEL) { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + switch (type[i]) { + + case INT_VALUE : { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + + for (k = 0;k < length[j];k++) { + seq->real_sequence[j][i][k] = int_sequence[j][i][k] - (continuous_process[i - 1]->observation[*pstate]->intercept + + continuous_process[i - 1]->observation[*pstate]->slope * k); + pstate++; + } + } + break; + } + + case REAL_VALUE : { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + + for (k = 0;k < length[j];k++) { + seq->real_sequence[j][i][k] = real_sequence[j][i][k] - (continuous_process[i - 1]->observation[*pstate]->intercept + + continuous_process[i - 1]->observation[*pstate]->slope * k); + pstate++; + } + } + break; + } + } + break; + } + + case TIME : { + switch (type[i]) { + + case INT_VALUE : { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + + for (k = 0;k < length[j];k++) { + seq->real_sequence[j][i][k] = int_sequence[j][i][k] - (continuous_process[i - 1]->observation[*pstate]->intercept + + continuous_process[i - 1]->observation[*pstate]->slope * index_parameter[j][k]); + pstate++; + } + } + break; + } + + case REAL_VALUE : { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + + for (k = 0;k < length[j];k++) { + seq->real_sequence[j][i][k] = real_sequence[j][i][k] - (continuous_process[i - 1]->observation[*pstate]->intercept + + continuous_process[i - 1]->observation[*pstate]->slope * index_parameter[j][k]); + pstate++; + } + } + break; + } + } + break; + } + } + } + + else if (continuous_process[i - 1]->ident == AUTOREGRESSIVE_MODEL) { + switch (type[i]) { + + case INT_VALUE : { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + + for (k = 0;k < length[j];k++) { + if ((k == 0) || (*pstate != *(pstate - 1))) { + seq->real_sequence[j][i][k] = int_sequence[j][i][k] - continuous_process[i - 1]->observation[*pstate]->location; + } + else { + seq->real_sequence[j][i][k] = int_sequence[j][i][k] - (continuous_process[i - 1]->observation[*pstate]->location + + continuous_process[i - 1]->observation[*pstate]->autoregressive_coeff * + (int_sequence[j][i][k - 1] - continuous_process[i - 1]->observation[*pstate]->location)); + } + pstate++; + } + } + break; + } + + case REAL_VALUE : { + for (j = 0;j < nb_sequence;j++) { + pstate = seq->int_sequence[j][0]; + + for (k = 0;k < length[j];k++) { + if ((k == 0) || (*pstate != *(pstate - 1))) { + seq->real_sequence[j][i][k] = real_sequence[j][i][k] - continuous_process[i - 1]->observation[*pstate]->location; + } + else { + seq->real_sequence[j][i][k] = real_sequence[j][i][k] - (continuous_process[i - 1]->observation[*pstate]->location + + continuous_process[i - 1]->observation[*pstate]->autoregressive_coeff * + (real_sequence[j][i][k - 1] - continuous_process[i - 1]->observation[*pstate]->location)); + } + pstate++; + } + } + break; + } + } + } + } + } + + for (i = 1;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_histogram(i); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Split of sequences in equal length segments. + * + * \param[in] error reference on a StatError object, + * \param[in] step sequence lengths. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* MarkovianSequences::split(StatError &error , int step) const + +{ + int i , j , k , m; + int inb_sequence , last_length , nb_segment , *ilength , *pindex_param , *cindex_param , + *pisequence , *cisequence; + double *prsequence , *crsequence; + MarkovianSequences *seq; + + + error.init(); + + if ((step < 1) || (step > max_length)) { + seq = NULL; + error.update(SEQ_error[SEQR_SEQUENCE_LENGTH]); + } + + else { + ilength = new int[cumul_length / step + nb_sequence]; + + inb_sequence = 0; + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i] / step;j++) { + ilength[inb_sequence++] = step; + } + last_length = length[i] % step; + if (last_length > 0) { + ilength[inb_sequence++] = last_length; + } + } + +# ifdef DEBUG + cout << "\nTEST: " << inb_sequence << " | " << cumul_length / step + nb_sequence << endl; +# endif + + seq = new MarkovianSequences(inb_sequence , NULL , ilength , vertex_identifier , + index_param_type , nb_variable , type); + delete [] ilength; + + // copy of sequences + + inb_sequence = 0; + for (i = 0;i < nb_sequence;i++) { + nb_segment = (length[i] % step == 0 ? length[i] / step : length[i] / step + 1); + + if (seq->index_param_type == TIME) { + cindex_param = index_parameter[i]; + for (j = 0;j < nb_segment;j++) { + pindex_param = seq->index_parameter[inb_sequence + j]; + for (k = 0;k < seq->length[inb_sequence + j];k++) { + *pindex_param++ = *cindex_param++; + } + } + } + + for (j = 0;j < seq->nb_variable;j++) { + if (seq->type[j] != REAL_VALUE) { + cisequence = int_sequence[i][j]; + for (k = 0;k < nb_segment;k++) { + pisequence = seq->int_sequence[inb_sequence + k][j]; + for (m = 0;m < seq->length[inb_sequence + k];m++) { + *pisequence++ = *cisequence++; + } + } + } + + else { + crsequence = real_sequence[i][j]; + for (k = 0;k < nb_segment;k++) { + prsequence = seq->real_sequence[inb_sequence + k][j]; + for (m = 0;m < seq->length[inb_sequence + k];m++) { + *prsequence++ = *crsequence++; + } + } + } + } + + inb_sequence += nb_segment; + } + + if (seq->index_param_type == TIME) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + seq->index_interval_computation(); + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value[i] = min_value[i]; + seq->max_value[i] = max_value[i]; + + if (marginal_distribution[i]) { + seq->marginal_distribution[i] = new FrequencyDistribution(*marginal_distribution[i]); + } + if (marginal_histogram[i]) { + seq->marginal_histogram[i] = new Histogram(*marginal_histogram[i]); + } + + seq->min_interval_computation(i); + } + + seq->build_characteristic(); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the cumulative frequency distribution function for a variable. + * + * \param[in] variable variable index, + * \param[in] cdf (value, cumulative distribution function). + * + * \return number of values. + */ +/*--------------------------------------------------------------*/ + +int MarkovianSequences::cumulative_distribution_function_computation(int variable , double **cdf) const + +{ + int i , j , k; + int cumul , int_min , int_value , frequency; + double real_min , real_value; + + + if (marginal_distribution[variable]) { + i = marginal_distribution[variable]->cumulative_distribution_function_computation(cdf); + } + + else { + cdf[0] = new double[cumul_length]; + cdf[1] = new double[cumul_length]; + + cumul = 0; + i = 0; + + switch (type[variable]) { + + case INT_VALUE : { + do { + + // search for the current minimum value + + if (cumul == 0) { + int_value = (int)min_value[variable]; + } + + else { + int_min = (int)max_value[variable] + 1; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if ((int_sequence[j][variable][k] > int_value) && + (int_sequence[j][variable][k] < int_min)) { + int_min = int_sequence[j][variable][k]; + } + } + } + int_value = int_min; + } + + // determination of the number of vectors taken the current minimum value + // for the selected variable + + frequency = 0; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if (int_sequence[j][variable][k] == int_value) { + frequency++; + } + } + } + + cdf[0][i] = int_value; + cdf[1][i] = (cumul + (double)(frequency + 1) / 2.) / (double)cumul_length; + cumul += frequency; + i++; + } + while (cumul < cumul_length); + break; + } + + case REAL_VALUE : { + do { + + // search for the current minimum value + + if (cumul == 0) { + real_value = min_value[variable]; + } + + else { + real_min = max_value[variable] + 1; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if ((real_sequence[j][variable][k] > real_value) && + (real_sequence[j][variable][k] < real_min)) { + real_min = real_sequence[j][variable][k]; + } + } + } + real_value = real_min; + } + + // determination of the number of vectors taken the current minimum value + // for the selected variable + + frequency = 0; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if (real_sequence[j][variable][k] == real_value) { + frequency++; + } + } + } + + cdf[0][i] = real_value; + cdf[1][i] = (cumul + (double)(frequency + 1) / 2.) / (double)cumul_length; + cumul += frequency; + i++; + } + while (cumul < cumul_length); + break; + } + } + } + +# ifdef DEBUG + cout << "\nCumul: "; + for (j = 0;j < i;j++) { + cout << cdf[0][j] << " " << cdf[1][j] << " | "; + } + cout << endl; +# endif + + return i; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the cumulative frequency distribution function for a variable. + * + * \param[in] variable variable index, + * \param[in] state state, + * \param[in] cdf (value, cumulative distribution function). + * + * \return number of values. + */ +/*--------------------------------------------------------------*/ + +int MarkovianSequences::cumulative_distribution_function_computation(int variable , int state , + double **cdf) const + +{ + int i , j , k; + int cumul , int_min , int_value , frequency; + double real_min , real_value; + + + if (observation_distribution[variable]) { + i = observation_distribution[variable][state]->cumulative_distribution_function_computation(cdf); + } + + else { + cdf[0] = new double[marginal_distribution[0]->frequency[state]]; + cdf[1] = new double[marginal_distribution[0]->frequency[state]]; + + cumul = 0; + i = 0; + + switch (type[variable]) { + + case INT_VALUE : { + do { + + // search for the current minimum value + + if (cumul == 0) { + int_value = (int)min_value[variable]; + } + + else { + int_min = (int)max_value[variable] + 1; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if ((int_sequence[j][0][k] == state) && + (int_sequence[j][variable][k] > int_value) && + (int_sequence[j][variable][k] < int_min)) { + int_min = int_sequence[j][variable][k]; + } + } + } + int_value = int_min; + } + + // determination of the number of vectors taken the current minimum value + // for the selected variable + + frequency = 0; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if ((int_sequence[j][0][k] == state) && + (int_sequence[j][variable][k] == int_value)) { + frequency++; + } + } + } + + cdf[0][i] = int_value; + cdf[1][i] = (cumul + (double)(frequency + 1) / 2.) / + (double)marginal_distribution[0]->frequency[state]; + cumul += frequency; + i++; + } + while (cumul < marginal_distribution[0]->frequency[state]); + break; + } + + case REAL_VALUE : { + do { + + // search for the current minimum value + + if (cumul == 0) { + real_value = min_value[variable]; + } + + else { + real_min = max_value[variable] + 1; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if ((int_sequence[j][0][k] == state) && + (real_sequence[j][variable][k] > real_value) && + (real_sequence[j][variable][k] < real_min)) { + real_min = real_sequence[j][variable][k]; + } + } + } + real_value = real_min; + } + + // determination of the number of vectors taken the current minimum value + // for the selected variable + + frequency = 0; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if ((int_sequence[j][0][k] == state) && + (real_sequence[j][variable][k] == real_value)) { + frequency++; + } + } + } + + cdf[0][i] = real_value; + cdf[1][i] = (cumul + (double)(frequency + 1) / 2.) / + (double)marginal_distribution[0]->frequency[state]; + cumul += frequency; + i++; + } + while (cumul < marginal_distribution[0]->frequency[state]); + break; + } + } + } + + return i; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the minimum interval between 2 values for a variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::min_interval_computation(int variable) + +{ + if (marginal_distribution[variable]) { + min_interval[variable] = marginal_distribution[variable]->min_interval_computation(); + } + + else if (type[variable] != AUXILIARY) { + bool *selected_value = NULL; + int i , j , k , m; + int int_min , int_value , nb_value , max_frequency , *frequency = NULL , *index = NULL; + double real_min , real_value; + + + min_interval[variable] = max_value[variable] - min_value[variable]; + i = 0; + + switch (type[variable]) { + + case INT_VALUE : { + do { + + // search for the current minimum value + + if (i == 0) { + int_value = (int)min_value[variable]; + } + + else { + int_min = (int)max_value[variable] + 1; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if ((int_sequence[j][variable][k] > int_value) && + (int_sequence[j][variable][k] < int_min)) { + int_min = int_sequence[j][variable][k]; + } + } + } + + if (int_min - int_value < min_interval[variable]) { + min_interval[variable] = int_min - int_value; + } + int_value = int_min; + } + + // determination of the number of vectors taken the current minimum value + // for the selected variable + + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if (int_sequence[j][variable][k] == int_value) { + i++; + } + } + } + } + while (i < cumul_length); + break; + } + + case REAL_VALUE : { +// double max_interval = 0.; + + frequency = new int[cumul_length]; + + j = 0; + + do { + + // search for the current minimum value + + if (i == 0) { + real_value = min_value[variable]; + } + + else { + real_min = max_value[variable] + 1; + for (k = 0;k < nb_sequence;k++) { + for (m = 0;m < length[k];m++) { + if ((real_sequence[k][variable][m] > real_value) && + (real_sequence[k][variable][m] < real_min)) { + real_min = real_sequence[k][variable][m]; + } + } + } + + if (real_min - real_value < min_interval[variable]) { + min_interval[variable] = real_min - real_value; + } +/* if (real_min - real_value > max_interval) { + max_interval = real_min - real_value; + } */ + real_value = real_min; + } + + // determination of the number of vectors taken the current minimum value + // for the selected variable + + frequency[j] = 0; + for (k = 0;k < nb_sequence;k++) { + for (m = 0;m < length[k];m++) { + if (real_sequence[k][variable][m] == real_value) { + i++; + frequency[j]++; + } + } + } + j++; + } + while (i < cumul_length); + + // search for the median frequency + + nb_value = j; + selected_value = new bool[nb_value]; + index = new int [nb_value]; + + for (i = 0;i < nb_value;i++) { + selected_value[i] = false; + } + + i = 0; + do { + max_frequency = 0; + for (j = 0;j < nb_value;j++) { + if ((!selected_value[j]) && (frequency[j] > max_frequency)) { + max_frequency = frequency[j]; + } + } + + for (j = 0;j < nb_value;j++) { + if (frequency[j] == max_frequency) { + selected_value[j] = true; + index[i++] = j; + } + } + } + while (i < nb_value); + +# ifdef DEBUG + cout << "\n" << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << min_interval[variable] << " " << frequency[index[nb_value / 2]]; +# endif + + // to be finalized + + if (frequency[index[nb_value / 2]] == 1) { + min_interval[variable] = 0.; + } + +# ifdef DEBUG + cout << " | " << nb_value << " " << cumul_length << endl; +# endif + + delete [] frequency; + delete [] selected_value; + delete [] index; + break; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the information quantity assuming i.i.d. variables. + */ +/*--------------------------------------------------------------*/ + +double MarkovianSequences::iid_information_computation() const + +{ + int i; + double information = 0.; + + + for (i = (((type[0] != STATE) || (nb_variable == 1)) ? 0 : 1);i < nb_variable;i++) { + if (marginal_distribution[i]) { + information += marginal_distribution[i]->information_computation(); + } + else { + information = D_INF; + break; + } + } + + return information; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of samples corresponding to the changes in self-transition probability + * for a state of a non-homogeneous Markov chain. + * + * \param[in] state state. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::self_transition_computation(int state) + +{ + int i , j; + int num , denom; + + + for (i = 0;i < max_length - 1;i++) { + num = 0; + denom = 0; + + for (j = 0;j < nb_sequence;j++) { + if (i < length[j] - 1) { + if (int_sequence[j][0][i] == state) { + if (int_sequence[j][0][i + 1] == state) { + num++; + } + denom++; + } + } + } + + self_transition[state]->frequency[i] = denom; + if (denom > 0) { + self_transition[state]->point[0][i] = (double)num / (double)denom; + } + else { + self_transition[state]->point[0][i] = D_DEFAULT; + } + } + +# ifdef DEBUG + double sum = 0.; + + for (i = 0;i < max_length - 1;i++) { + sum += self_transition[state]->frequency[i] * self_transition[state]->point[0][i]; + } + + cout << "\naverage self-transition count: " + << sum << endl; + + // cout << "\naverage self-transition probability : " + // << sum / self_transition[state]->nb_element_computation() << endl; +# endif + +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of samples corresponding to the changes in self-transition probability + * for states of a non-homogeneous Markov chain. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::self_transition_computation() + +{ + if (!self_transition) { + int i; + + + state_variable_init(); +# ifdef DEBUG + assert(self_transition == NULL); +# endif + + self_transition = new SelfTransition*[marginal_distribution[0]->nb_value]; + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + self_transition[i] = new SelfTransition(max_length - 1); + self_transition_computation(i); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of samples corresponding to the changes in self-transition probability + * for states of a non-homogeneous Markov chain. + * + * \param[in] homogeneity state homogeneities. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::self_transition_computation(bool *homogeneity) + +{ + int i; + + if (self_transition == NULL) { + + state_variable_init(); + self_transition = new SelfTransition*[marginal_distribution[0]->nb_value]; + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (homogeneity[i]) { + self_transition[i] = NULL; + } + else { + self_transition[i] = new SelfTransition(max_length - 1); + self_transition_computation(i); + } + } + } else { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (!homogeneity[i]) + self_transition_computation(i); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the marginal state frequency distribution from + * the restored state sequences. + * + * \return Distribution object. + */ +/*--------------------------------------------------------------*/ + +Distribution* MarkovianSequences::weight_computation() const + +{ + int i; + Distribution *weight = NULL; + + + if (type[0] == STATE) { + weight = new Distribution(marginal_distribution[0]->nb_value); + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + weight->mass[i] = (double)marginal_distribution[0]->frequency[i] / + (double)marginal_distribution[0]->nb_element; + } + + weight->cumul_computation(); + weight->max = (double)marginal_distribution[0]->max / + (double)marginal_distribution[0]->nb_element; + } + + return weight; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Update of the observation frequency distributions for a variable. + * + * \param[in] variable variable index, + * \param[in] nb_state number of states. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::observation_frequency_distribution_computation(int variable , + int nb_state) + +{ + int i , j; + int *pstate , *poutput; + + + // initialization of the frequency distributions + + for (i = 0;i < nb_state;i++) { + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + observation_distribution[variable][i]->frequency[j] = 0; + } + } + + // update of the frequency distributions + + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + poutput = int_sequence[i][variable]; + for (j = 0;j < length[i];j++) { + (observation_distribution[variable][*pstate++]->frequency[*poutput++])++; + } + } + + // computation of the frequency distribution characteristics + + for (i = 0;i < nb_state;i++) { + if (!characteristics[variable]) { + observation_distribution[variable][i]->nb_value_computation(); + } + observation_distribution[variable][i]->offset_computation(); + observation_distribution[variable][i]->nb_element_computation(); + observation_distribution[variable][i]->max_computation(); + + if (!characteristics[variable]) { + observation_distribution[variable][i]->mean_computation(); + observation_distribution[variable][i]->variance_computation(); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the observation frequency distributions. + * + * \param[in] nb_state number of states. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_observation_frequency_distribution(int nb_state) + +{ + if ((nb_variable > 1) && (!observation_distribution)) { + int i , j; + + + observation_distribution = new FrequencyDistribution**[nb_variable]; + observation_distribution[0] = NULL; + + for (i = 1;i < nb_variable;i++) { + if (marginal_distribution[i]) { + observation_distribution[i] = new FrequencyDistribution*[nb_state]; + for (j = 0;j < nb_state;j++) { + observation_distribution[i][j] = new FrequencyDistribution(marginal_distribution[i]->nb_value); + } + + observation_frequency_distribution_computation(i , nb_state); + } + + else { + observation_distribution[i] = NULL; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the observation histograms for a variable. + * + * \param[in] variable variable index, + * \param[in] nb_state number of states, + * \param[in] bin_width bin width. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_observation_histogram(int variable , int nb_state , double bin_width) + +{ + if ((!observation_histogram[variable]) || (bin_width != observation_histogram[variable][0]->bin_width)) { + int i , j; + int *pstate , *pioutput; + double imin_value , *proutput; + + + // construction of the histograms + + if (bin_width == D_DEFAULT) { + bin_width = marginal_histogram[variable]->bin_width; + } + imin_value = floor(min_value[variable] / bin_width) * bin_width; + + if (observation_histogram[variable]) { + for (i = 0;i < nb_state;i++) { + observation_histogram[variable][i]->nb_bin = (int)ceil((max_value[variable] - imin_value) / bin_width) + 1; + + delete [] observation_histogram[variable][i]->frequency; + observation_histogram[variable][i]->frequency = new int[observation_histogram[variable][i]->nb_bin]; + } + } + + else { + observation_histogram[variable] = new Histogram*[nb_state]; + + for (i = 0;i < nb_state;i++) { + observation_histogram[variable][i] = new Histogram((int)ceil((max_value[variable] - imin_value) / bin_width) + 1 , false); + + observation_histogram[variable][i]->nb_element = marginal_distribution[0]->frequency[i]; + observation_histogram[variable][i]->type = type[variable]; + } + + // computation of the minimum and maximum values for each state + +/* for (i = 0;i < nb_state;i++) { + observation_histogram[variable][i]->min_value = max_value[variable]; + observation_histogram[variable][i]->max_value = min_value[variable]; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + pioutput = int_sequence[i][variable]; + + for (j = 0;j < length[i];j++) { + if (*pioutput < observation_histogram[variable][*pstate]->min_value) { + observation_histogram[variable][*pstate]->min_value = *pioutput; + } + if (*pioutput > observation_histogram[variable][*pstate]->max_value) { + observation_histogram[variable][*pstate]->max_value = *pioutput; + } + pstate++; + pioutput++; + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + proutput = real_sequence[i][variable]; + + for (j = 0;j < length[i];j++) { + if (*proutput < observation_histogram[variable][*pstate]->min_value) { + observation_histogram[variable][*pstate]->min_value = *proutput; + } + if (*proutput > observation_histogram[variable][*pstate]->max_value) { + observation_histogram[variable][*pstate]->max_value = *proutput; + } + pstate++; + proutput++; + } + } + break; + } + } */ + } + + for (i = 0;i < nb_state;i++) { + observation_histogram[variable][i]->bin_width = bin_width; + observation_histogram[variable][i]->min_value = imin_value; + observation_histogram[variable][i]->max_value = ceil(max_value[variable] / bin_width) * bin_width; + } + + // update of the histogram bin frequencies + + for (i = 0;i < nb_state;i++) { + for (j = 0;j < observation_histogram[variable][i]->nb_bin;j++) { + observation_histogram[variable][i]->frequency[j] = 0; + } + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + pioutput = int_sequence[i][variable]; + for (j = 0;j < length[i];j++) { +// (observation_histogram[variable][*pstate++]->frequency[(int)((*pioutput++ - imin_value) / bin_width)])++; + (observation_histogram[variable][*pstate++]->frequency[(int)floor((*pioutput++ - imin_value) / bin_width)])++; + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + proutput = real_sequence[i][variable]; + for (j = 0;j < length[i];j++) { +// (observation_histogram[variable][*pstate++]->frequency[(int)((*proutput++ - imin_value) / bin_width)]++; +# ifdef DEBUG + assert(variable < nb_variable); + assert(*pstate < nb_state); + assert(observation_histogram[variable][*pstate]->frequency != NULL); + if ((int)floor((*proutput - imin_value) / bin_width) >= observation_histogram[variable][*pstate]->nb_bin) + cout << "Inconsistency sequence " << i << " variable " << variable << " value " << *proutput << " state " + << *pstate << " bin " << (*proutput - imin_value) / bin_width << " max bin " << observation_histogram[variable][*pstate]->nb_bin + << " max_value[variable] " << max_value[variable] << endl; + assert((int)floor((*proutput - imin_value) / bin_width) < observation_histogram[variable][*pstate]->nb_bin); +# endif + (observation_histogram[variable][*pstate++]->frequency[(int)floor((*proutput++ - imin_value) / bin_width)])++; + } + } + break; + } + } + + for (i = 0;i < nb_state;i++) { + observation_histogram[variable][i]->max_computation(); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the observation histograms. + * + * \param[in] nb_state number of states. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_observation_histogram(int nb_state) + +{ + if ((nb_variable > 1) && (!observation_histogram)) { + int i; + + + observation_histogram = new Histogram**[nb_variable]; + observation_histogram[0] = NULL; + + for (i = 1;i < nb_variable;i++) { + observation_histogram[i] = NULL; + if (marginal_histogram[i]) { + build_observation_histogram(i , nb_state , marginal_histogram[i]->bin_width); + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Change of the bin width of the marginal histogram and of + * the observation histograms for a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] bin_width bin_width bin width, + * \param[in] imin_value minimum value. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::select_bin_width(StatError &error , int variable , + double bin_width , double imin_value) + +{ + bool status = true; + + + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (!marginal_histogram[variable]) { + status = false; + error.update(STAT_error[STATR_MARGINAL_HISTOGRAM]); + } + if ((bin_width <= 0.) || ((type[variable] != REAL_VALUE) && + (type[variable] != AUXILIARY) && ((int)bin_width != bin_width))) { + status = false; + error.update(STAT_error[STATR_HISTOGRAM_BIN_WIDTH]); + } + if ((imin_value != D_INF) && ((imin_value <= min_value[variable] - bin_width) || + (imin_value > min_value[variable]) || ((type[variable] != REAL_VALUE) && + (type[variable] != AUXILIARY) && ((int)imin_value != imin_value)))) { + status = false; + error.update(STAT_error[STATR_HISTOGRAM_MIN_VALUE]); + } + } + + if (status) { + build_marginal_histogram(variable , bin_width , imin_value); + + if ((observation_histogram) && (observation_histogram[variable])) { + build_observation_histogram(variable , marginal_distribution[0]->nb_value , bin_width); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Test of the overlap of values observed in the different states. + * + * \param[in] variable variable index. + * + * \return state variable hidden or not. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::test_hidden(int variable) const + +{ + bool hidden = true; + + if ((variable > 0) && (type[variable] == INT_VALUE)) { + bool **occurrence; + int i , j; + int nb_occurrence , *pstate , *poutput; + + + hidden = false; + + occurrence = new bool*[marginal_distribution[0]->nb_value]; + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + occurrence[i] = new bool[marginal_distribution[variable]->nb_value]; + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + occurrence[i][j] = false; + } + } + + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + poutput = int_sequence[i][variable]; + for (j = 0;j < length[i];j++) { + occurrence[*pstate++][*poutput++] = true; + } + } + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + nb_occurrence = 0; + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + if (occurrence[j][i]) { + nb_occurrence++; + } + } + + if (nb_occurrence > 1) { + hidden = true; + break; + } + } + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + delete [] occurrence[i]; + } + delete [] occurrence; + } + + return hidden; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the probabilities of each value as a function of + * the index for a variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_index_value(int variable) + +{ + int i , j; + int total , *frequency; + + + // construction of a Curves object + + characteristics[variable]->index_value = new Curves(marginal_distribution[variable]->nb_value , + max_length , true , false , false); + frequency = new int[marginal_distribution[variable]->nb_value]; + + // computation of the probabilities of each value as a function of the index parameter + + for (i = 0;i < max_length;i++) { + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + frequency[j] = 0; + } + + for (j = 0;j < nb_sequence;j++) { + if (i < length[j]) { + frequency[int_sequence[j][variable][i]]++; + } + } + + total = 0; + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + total += frequency[j]; + } + characteristics[variable]->index_value->frequency[i] = total; + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + characteristics[variable]->index_value->point[j][i] = (double)frequency[j] / (double)total; + } + } + + delete [] frequency; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the probabilities of each value as a function of + * the explicit index for a variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_explicit_index_value(int variable) + +{ + int i , j , k , m; + int total , *frequency , *index; + + + // construction of a Curves object + + i = 0; + for (j = index_parameter_distribution->offset;j < index_parameter_distribution->nb_value;j++) { + if (index_parameter_distribution->frequency[j] > 0) { + i++; + } + } + characteristics[variable]->explicit_index_value = new Curves(marginal_distribution[variable]->nb_value , + i , true , true , false); + + frequency = new int[marginal_distribution[variable]->nb_value]; + index = new int[nb_sequence]; + + // computation of the probabilities of each value as a function of the explicit index parameter + + for (i = 0;i < nb_sequence;i++) { + index[i] = 0; + } + + i = 0; + for (j = index_parameter_distribution->offset;j < index_parameter_distribution->nb_value;j++) { + if (index_parameter_distribution->frequency[j] > 0) { + characteristics[variable]->explicit_index_value->index_parameter[i] = j; + + for (k = 0;k < marginal_distribution[variable]->nb_value;k++) { + frequency[k] = 0; + } + + for (k = 0;k < nb_sequence;k++) { + m = 0; + m = index[k]; + while ((m < length[k] - 1) && (index_parameter[k][m] < j)) { + m++; + } + if (index_parameter[k][m] == j) { + index[k] = m; + frequency[int_sequence[k][variable][m]]++; + } + } + + total = 0; + for (k = 0;k < marginal_distribution[variable]->nb_value;k++) { + total += frequency[k]; + } + characteristics[variable]->explicit_index_value->frequency[i] = total; + for (k = 0;k < marginal_distribution[variable]->nb_value;k++) { + characteristics[variable]->explicit_index_value->point[k][i] = (double)frequency[k] / (double)total; + } + i++; + } + } + + delete [] frequency; + delete [] index; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the frequency distributions of the times to the 1st occurrence of + * each value of an integer variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_first_occurrence_frequency_distribution(int variable) + +{ + bool *occurrence; + int i , j; + int nb_value , *pisequence; + FrequencyDistribution **first_occurrence; + + + // construction of the frequency distributions + + first_occurrence = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + first_occurrence[i] = new FrequencyDistribution(max_length); + } + +/* characteristics[variable]->first_occurrence = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->first_occurrence[i] = new FrequencyDistribution(max_length); + } */ + + // update of the frequency distributions + + occurrence = new bool[marginal_distribution[variable]->nb_value]; + + for (i = 0;i < nb_sequence;i++) { + nb_value = 0; + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + occurrence[j] = false; + } + + pisequence = int_sequence[i][variable]; + for (j = 0;j < length[i];j++) { + if (!occurrence[*pisequence]) { + occurrence[*pisequence] = true; + (first_occurrence[*pisequence]->frequency[j])++; +// (characteristics[variable]->first_occurrence[*pisequence]->frequency[j])++; + + nb_value++; + if (nb_value == marginal_distribution[variable]->nb_value) { + break; + } + } + + pisequence++; + } + } + + delete [] occurrence; + + // computation of the frequency distribution characteristics + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + first_occurrence[i]->nb_value_computation(); + first_occurrence[i]->offset_computation(); + first_occurrence[i]->nb_element_computation(); + first_occurrence[i]->max_computation(); + first_occurrence[i]->mean_computation(); + first_occurrence[i]->variance_computation(); + } + +/* for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->first_occurrence[i]->nb_value_computation(); + characteristics[variable]->first_occurrence[i]->offset_computation(); + characteristics[variable]->first_occurrence[i]->nb_element_computation(); + characteristics[variable]->first_occurrence[i]->max_computation(); + characteristics[variable]->first_occurrence[i]->mean_computation(); + characteristics[variable]->first_occurrence[i]->variance_computation(); + } */ + + characteristics[variable]->first_occurrence = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->first_occurrence[i] = new FrequencyDistribution(*(first_occurrence[i])); + delete first_occurrence[i]; + } + delete [] first_occurrence; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the frequency distributions of recurrence times in + * each value of an integer variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_recurrence_time_frequency_distribution(int variable) + +{ + int i , j; + int *index , *pisequence; + FrequencyDistribution **recurrence_time; + + + // construction of the frequency distributions + + recurrence_time = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + recurrence_time[i] = new FrequencyDistribution(max_length); + } + +/* characteristics[variable]->recurrence_time = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->recurrence_time[i] = new FrequencyDistribution(max_length); + } */ + + // update of the frequency distributions + + index = new int[marginal_distribution[variable]->nb_value]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + index[j] = I_DEFAULT; + } + pisequence = int_sequence[i][variable]; + + for (j = 0;j < length[i];j++) { + if (index[*pisequence] != I_DEFAULT) { + (recurrence_time[*pisequence]->frequency[j - index[*pisequence]])++; +// (characteristics[variable]->recurrence_time[*pisequence]->frequency[j - index[*pisequence]])++; + } + index[*pisequence++] = j; + } + } + + delete [] index; + + // computation of the frequency distribution characteristics + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + recurrence_time[i]->nb_value_computation(); + recurrence_time[i]->offset_computation(); + recurrence_time[i]->nb_element_computation(); + recurrence_time[i]->max_computation(); + recurrence_time[i]->mean_computation(); + recurrence_time[i]->variance_computation(); + } + +/* for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->recurrence_time[i]->nb_value_computation(); + characteristics[variable]->recurrence_time[i]->offset_computation(); + characteristics[variable]->recurrence_time[i]->nb_element_computation(); + characteristics[variable]->recurrence_time[i]->max_computation(); + characteristics[variable]->recurrence_time[i]->mean_computation(); + characteristics[variable]->recurrence_time[i]->variance_computation(); + } */ + + characteristics[variable]->recurrence_time = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->recurrence_time[i] = new FrequencyDistribution(*(recurrence_time[i])); + delete recurrence_time[i]; + } + delete [] recurrence_time; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the frequency distributions of sojourn times in + * each value of an integer variable. + * + * \param[in] variable variable index, + * \param[in] initial_run_flag flag on the construction of left-censored sojourn time frequency distributions. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_sojourn_time_frequency_distribution(int variable , int initial_run_flag) + +/* { + characteristics[variable]->create_sojourn_time_frequency_distribution(max_length , initial_run_flag); + sojourn_time_frequency_distribution_computation(variable); +} */ + +{ + int i , j; + int run_length , *pisequence; + FrequencyDistribution **sojourn_time=NULL, **initial_run=NULL , **final_run=NULL; + + + // construction of the frequency distributions + + sojourn_time = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + sojourn_time[i] = new FrequencyDistribution(max_length + 1); + } + + if (initial_run_flag) { + initial_run = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + initial_run[i] = new FrequencyDistribution(max_length + 1); + } + } + + final_run = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + final_run[i] = new FrequencyDistribution(max_length + 1); + } + + // update of the frequency distributions + + for (i = 0;i < nb_sequence;i++) { + pisequence = int_sequence[i][variable]; + run_length = 1; + for (j = 1;j < length[i];j++) { + if (*(pisequence + 1) != *pisequence) { + if ((initial_run_flag) && (run_length == j)) { + (initial_run[*pisequence]->frequency[run_length])++; + } + else { + (sojourn_time[*pisequence]->frequency[run_length])++; + } + run_length = 0; + } + + run_length++; + pisequence++; + } + + if ((initial_run_flag) && (run_length == length[i])) { + (initial_run[*pisequence]->frequency[run_length])++; + } + (final_run[*pisequence]->frequency[run_length])++; + } + + // computation of the frequency distribution characteristics + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + sojourn_time[i]->nb_value_computation(); + sojourn_time[i]->offset_computation(); + sojourn_time[i]->nb_element_computation(); + sojourn_time[i]->max_computation(); + sojourn_time[i]->mean_computation(); + sojourn_time[i]->variance_computation(); + } + + if (initial_run_flag) { + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + initial_run[i]->nb_value_computation(); + initial_run[i]->offset_computation(); + initial_run[i]->nb_element_computation(); + initial_run[i]->max_computation(); + initial_run[i]->mean_computation(); + initial_run[i]->variance_computation(); + } + } + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + final_run[i]->nb_value_computation(); + final_run[i]->offset_computation(); + final_run[i]->nb_element_computation(); + final_run[i]->max_computation(); + final_run[i]->mean_computation(); + final_run[i]->variance_computation(); + } + + characteristics[variable]->sojourn_time = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->sojourn_time[i] = new FrequencyDistribution(*(sojourn_time[i])); + delete sojourn_time[i]; + } + delete [] sojourn_time; + + if (initial_run_flag) { + characteristics[variable]->initial_run = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->initial_run[i] = new FrequencyDistribution(*(initial_run[i])); + delete initial_run[i]; + } + delete [] initial_run; + } + + characteristics[variable]->final_run = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->final_run[i] = new FrequencyDistribution(*(final_run[i])); + delete final_run[i]; + } + delete [] final_run; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Update of the frequency distributions of sojourn times in + * each value of an integer variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::sojourn_time_frequency_distribution_computation(int variable) + +{ + int i , j; + int run_length , *pisequence; + + + // initialization of the frequency distributions + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->sojourn_time[i]->offset = 1; + characteristics[variable]->sojourn_time[i]->nb_value = characteristics[variable]->sojourn_time[i]->alloc_nb_value; + + for (j = 0;j < characteristics[variable]->sojourn_time[i]->nb_value;j++) { + characteristics[variable]->sojourn_time[i]->frequency[j] = 0; + } + + if (characteristics[variable]->initial_run) { + characteristics[variable]->initial_run[i]->offset = 1; + characteristics[variable]->initial_run[i]->nb_value = characteristics[variable]->initial_run[i]->alloc_nb_value; + + for (j = 0;j < characteristics[variable]->initial_run[i]->nb_value;j++) { + characteristics[variable]->initial_run[i]->frequency[j] = 0; + } + } + + characteristics[variable]->final_run[i]->offset = 1; + characteristics[variable]->final_run[i]->nb_value = characteristics[variable]->final_run[i]->alloc_nb_value; + + for (j = 0;j < characteristics[variable]->final_run[i]->nb_value;j++) { + characteristics[variable]->final_run[i]->frequency[j] = 0; + } + } + + // update of the frequency distributions + + for (i = 0;i < nb_sequence;i++) { + pisequence = int_sequence[i][variable]; + run_length = 1; + for (j = 1;j < length[i];j++) { + if (*(pisequence + 1) != *pisequence) { + if ((characteristics[variable]->initial_run) && (run_length == j)) { + (characteristics[variable]->initial_run[*pisequence]->frequency[run_length])++; + } + else { + (characteristics[variable]->sojourn_time[*pisequence]->frequency[run_length])++; + } + run_length = 0; + } + + run_length++; + pisequence++; + } + + if ((characteristics[variable]->initial_run) && (run_length == length[i])) { + (characteristics[variable]->initial_run[*pisequence]->frequency[run_length])++; + } + (characteristics[variable]->final_run[*pisequence]->frequency[run_length])++; + } + + // computation of the frequency distribution characteristics + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->sojourn_time[i]->nb_value_computation(); + characteristics[variable]->sojourn_time[i]->offset_computation(); + characteristics[variable]->sojourn_time[i]->nb_element_computation(); + characteristics[variable]->sojourn_time[i]->max_computation(); + characteristics[variable]->sojourn_time[i]->mean_computation(); + characteristics[variable]->sojourn_time[i]->variance_computation(); + } + + if (characteristics[variable]->initial_run) { + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->initial_run[i]->nb_value_computation(); + characteristics[variable]->initial_run[i]->offset_computation(); + characteristics[variable]->initial_run[i]->nb_element_computation(); + characteristics[variable]->initial_run[i]->max_computation(); + characteristics[variable]->initial_run[i]->mean_computation(); + characteristics[variable]->initial_run[i]->variance_computation(); + } + } + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->final_run[i]->nb_value_computation(); + characteristics[variable]->final_run[i]->offset_computation(); + characteristics[variable]->final_run[i]->nb_element_computation(); + characteristics[variable]->final_run[i]->max_computation(); + characteristics[variable]->final_run[i]->mean_computation(); + characteristics[variable]->final_run[i]->variance_computation(); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Update of the frequency distributions of censored sojourn times in + * each value of an integer variable. + * + * \param[in] initial_run pointer on the left-censored sojourn time frequency distributions, + * \param[in] final_run pointer on the right-censored sojourn time frequency distributions, + * \param[in] single_run pointer on the sequence length frequency distributions for the case of + * a single visited state. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::censored_sojourn_time_frequency_distribution_computation(FrequencyDistribution **initial_run , + FrequencyDistribution **final_run , + FrequencyDistribution **single_run) const + +{ + int i , j; + int *pisequence; + + + for (i = 0;i < nb_sequence;i++) { + pisequence = int_sequence[i][0]; + for (j = 1;j < length[i];j++) { + if (*(pisequence + 1) != *pisequence) { + (initial_run[*pisequence]->frequency[j])++; + break; + } + pisequence++; + } + + pisequence = int_sequence[i][0] + length[i] - 1; + if (j == length[i]) { + (single_run[*pisequence]->frequency[length[i]])++; + } + + else { + for (j = 1;j < length[i];j++) { + if (*(pisequence - 1) != *pisequence) { + (final_run[*pisequence]->frequency[j])++; + break; + } + pisequence--; + } + } + } + + // computation of the characteristics of the left- and right-censored sojourn time frequency distributions + // and single run frequency distributions + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + initial_run[i]->nb_value_computation(); + initial_run[i]->offset_computation(); + initial_run[i]->nb_element_computation(); + +# ifdef DEBUG + initial_run[i]->max_computation(); + initial_run[i]->mean_computation(); + initial_run[i]->variance_computation(); + + cout << initial_run[i]; +# endif + + final_run[i]->nb_value_computation(); + final_run[i]->offset_computation(); + final_run[i]->nb_element_computation(); + +# ifdef DEBUG + final_run[i]->max_computation(); + final_run[i]->mean_computation(); + final_run[i]->variance_computation(); + + cout << final_run[i]; +# endif + + single_run[i]->nb_value_computation(); + single_run[i]->offset_computation(); + single_run[i]->nb_element_computation(); + +# ifdef DEBUG + single_run[i]->max_computation(); + single_run[i]->mean_computation(); + single_run[i]->variance_computation(); + + cout << single_run[i]; +# endif + + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the frequency distributions of the number of runs + * per sequence of each value of an integer variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_nb_run_frequency_distribution(int variable) + +{ + int i , j; + int *pisequence , *count; + FrequencyDistribution **nb_run; + + + // construction of the frequency distributions + + nb_run = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + nb_run[i] = new FrequencyDistribution((max_length % 2 == 0 ? + max_length / 2 : max_length / 2 + 1) + 1); + } + +/* characteristics[variable]->nb_run = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->nb_run[i] = new FrequencyDistribution((max_length % 2 == 0 ? + max_length / 2 : max_length / 2 + 1) + 1); + } */ + + // update of the frequency distributions + + count = new int[marginal_distribution[variable]->nb_value]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + count[j] = 0; + } + + pisequence = int_sequence[i][variable]; + count[*pisequence++]++; + for (j = 1;j < length[i];j++) { + if (*pisequence != *(pisequence - 1)) { + count[*pisequence]++; + } + pisequence++; + } + + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + (nb_run[j]->frequency[count[j]])++; +// (characteristics[variable]->nb_run[j]->frequency[count[j]])++; + } + } + + delete [] count; + + // computation of the frequency distribution characteristics + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + nb_run[i]->nb_value_computation(); + nb_run[i]->offset_computation(); + nb_run[i]->nb_element = nb_sequence; + nb_run[i]->max_computation(); + nb_run[i]->mean_computation(); + nb_run[i]->variance_computation(); + } + +/* for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->nb_run[i]->nb_value_computation(); + characteristics[variable]->nb_run[i]->offset_computation(); + characteristics[variable]->nb_run[i]->nb_element = nb_sequence; + characteristics[variable]->nb_run[i]->max_computation(); + characteristics[variable]->nb_run[i]->mean_computation(); + characteristics[variable]->nb_run[i]->variance_computation(); + } */ + + characteristics[variable]->nb_run = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->nb_run[i] = new FrequencyDistribution(*(nb_run[i])); + delete nb_run[i]; + } + delete [] nb_run; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the frequency distributions of the number of occurrences + * per sequence of each value of an integer variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_nb_occurrence_frequency_distribution(int variable) + +{ + int i , j; + int *pisequence , *count; + FrequencyDistribution **nb_occurrence; + + + // construction of the frequency distributions + + nb_occurrence = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + nb_occurrence[i] = new FrequencyDistribution(max_length + 1); + } + +/* characteristics[variable]->nb_occurrence = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->nb_occurrence[i] = new FrequencyDistribution(max_length + 1); + } */ + + // update of the frequency distributions + + count = new int[marginal_distribution[variable]->nb_value]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + count[j] = 0; + } + + pisequence = int_sequence[i][variable]; + for (j = 0;j < length[i];j++) { + count[*pisequence++]++; + } + + for (j = 0;j < marginal_distribution[variable]->nb_value;j++) { + (nb_occurrence[j]->frequency[count[j]])++; +// (characteristics[variable]->nb_occurrence[j]->frequency[count[j]])++; + } + } + + delete [] count; + + // computation of the frequency distribution characteristics + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + nb_occurrence[i]->nb_value_computation(); + nb_occurrence[i]->offset_computation(); + nb_occurrence[i]->nb_element = nb_sequence; + nb_occurrence[i]->max_computation(); + nb_occurrence[i]->mean_computation(); + nb_occurrence[i]->variance_computation(); + } + +/* for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->nb_occurrence[i]->nb_value_computation(); + characteristics[variable]->nb_occurrence[i]->offset_computation(); + characteristics[variable]->nb_occurrence[i]->nb_element = nb_sequence; + characteristics[variable]->nb_occurrence[i]->max_computation(); + characteristics[variable]->nb_occurrence[i]->mean_computation(); + characteristics[variable]->nb_occurrence[i]->variance_computation(); + } */ + + characteristics[variable]->nb_occurrence = new FrequencyDistribution*[marginal_distribution[variable]->nb_value]; + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + characteristics[variable]->nb_occurrence[i] = new FrequencyDistribution(*(nb_occurrence[i])); + delete nb_occurrence[i]; + } + delete [] nb_occurrence; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the characteristics of sequences for categorical variables. + * Computation of the number of values, construction of the marginal frequency distribution, + * of the probabilities of each value as a function of the index parameter, + * construction of the frequency distributions of the times to the 1st occurrence of each value, + * of the recurrence times in each value, of the sojourn times in each value, + * of the number of runs of each value per sequence, + * of the number of occurrences of each value per sequence. + * + * \param[in] variable variable index, + * \param[in] sojourn_time_flag flag on the construction of sojourn time frequency distributions + * \param[in] initial_run_flag flag on the construction of left-censored sojourn time frequency distributions. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::build_characteristic(int variable , bool sojourn_time_flag , + bool initial_run_flag) + +{ + int i , j; + bool build; + + + for (i = 0;i < nb_variable;i++) { + if (((variable == I_DEFAULT) || (i == variable)) && (marginal_distribution[i])) { + build = true; + + if (marginal_distribution[i]->nb_value > NB_OUTPUT) { + build = false; + } + + else if (type[i] != STATE) { + for (j = 0;j < marginal_distribution[i]->nb_value;j++) { + if (marginal_distribution[i]->frequency[j] == 0) { + build = false; + break; + } + } + } + + if (build) { + if (sojourn_time_flag) { + characteristics[i] = new SequenceCharacteristics(marginal_distribution[i]->nb_value); + } + + build_index_value(i); + if (index_parameter) { + build_explicit_index_value(i); + } + + build_first_occurrence_frequency_distribution(i); + build_recurrence_time_frequency_distribution(i); + +/* if (sojourn_time_flag) { + characteristics[i]->create_sojourn_time_frequency_distribution(max_length , initial_run_flag); + } + sojourn_time_frequency_distribution_computation(i); */ + + if (sojourn_time_flag) { + build_sojourn_time_frequency_distribution(i , initial_run_flag); + } + else { + sojourn_time_frequency_distribution_computation(i); + } + + if (max_length <= COUNTING_FREQUENCY_MAX_LENGTH) { + build_nb_run_frequency_distribution(i); + build_nb_occurrence_frequency_distribution(i); + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Count of words of fixed length. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying word counts, + * \param[in] variable variable index, + * \param[in] word_length word length, + * \param[in] begin_state begin state, + * \param[in] end_state end state, + * \param[in] min_frequency minimum frequency. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::word_count(StatError &error , ostream *os , int variable , + int word_length , int begin_state , int end_state , + int min_frequency) const + +{ + bool status = true , *selected_word; + int i , j , k; + int nb_state , nb_word , max_nb_word , value , max_frequency , total_frequency , width , + *power , *frequency , *word_value , *pisequence , *index , **word; + double nb_word_bound , *probability; + ios_base::fmtflags format_flags; + + + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + nb_state = marginal_distribution[variable]->nb_value - marginal_distribution[variable]->offset; + + if ((nb_state < 2) || (nb_state > NB_STATE)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + + else { + max_nb_word = 0; + for (i = MAX(length_distribution->offset , word_length);i < length_distribution->nb_value;i++) { + max_nb_word += length_distribution->frequency[i] * (i - (word_length - 1)); + } + nb_word_bound = pow((double)nb_state , word_length); + if (nb_word_bound < max_nb_word) { + max_nb_word = (int)nb_word_bound; + } + + if (max_nb_word > MAX_NB_WORD) { + status = false; + error.update(SEQ_error[SEQR_MAX_NB_WORD]); + } + + if ((begin_state != I_DEFAULT) && ((begin_state < marginal_distribution[variable]->offset) || + (begin_state >= marginal_distribution[variable]->nb_value) || + (marginal_distribution[variable]->frequency[begin_state] == 0))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_label[STATL_STATE] << " " << begin_state << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + if ((end_state != I_DEFAULT) && ((end_state < marginal_distribution[variable]->offset) || + (end_state >= marginal_distribution[variable]->nb_value) || + (marginal_distribution[variable]->frequency[end_state] == 0))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_label[STATL_STATE] << " " << end_state << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + } + } + } + + if (min_frequency < 1) { + status = false; + error.update(SEQ_error[SEQR_MIN_FREQUENCY]); + } + + if (status) { + power = new int[word_length]; + + i = 1; + for (j = 0;j < word_length - 1;j++) { + power[j] = i; + i *= marginal_distribution[variable]->nb_value; + } + power[word_length - 1] = i; + + frequency = new int[max_nb_word]; + word_value = new int[max_nb_word]; + word = new int*[max_nb_word]; + + nb_word = 0; + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i] - (word_length - 1);j++) { + if (((begin_state == I_DEFAULT) || (int_sequence[i][variable][j] == begin_state)) && + ((end_state == I_DEFAULT) || (int_sequence[i][variable][j + word_length - 1] == end_state))) { + + // computation of the word score + + pisequence = int_sequence[i][variable] + j; + value = 0; + for (k = 0;k < word_length;k++) { + value += *pisequence++ * power[k]; + } + + // word search + + for (k = 0;k < nb_word;k++) { + if (value == word_value[k]) { + frequency[k]++; + break; + } + } + + // word construction + + if (k == nb_word) { + frequency[nb_word] = 1; + word_value[nb_word] = value; + word[nb_word] = new int[word_length]; + + pisequence = int_sequence[i][variable] + j; + for (k = 0;k < word_length;k++) { + word[nb_word][k] = *pisequence++; + } + nb_word++; + } + } + } + } + + // sort of words by decreasing frequencies + + index = new int[nb_word]; + selected_word = new bool[nb_word]; + probability = new double[nb_word]; + + total_frequency = 0; + for (i = 0;i < nb_word;i++) { + total_frequency += frequency[i]; + selected_word[i] = false; + } + + for (i = 0;i < nb_word;i++) { + max_frequency = 0; + for (j = 0;j < nb_word;j++) { + if ((!selected_word[j]) && (frequency[j] > max_frequency)) { + max_frequency = frequency[j]; + index[i] = j; + } + } + if (frequency[index[i]] < min_frequency) { + break; + } + + selected_word[index[i]] = true; + probability[index[i]] = (double)frequency[index[i]] / (double)total_frequency; + + if (i == 0) { + width = column_width(max_frequency); + } + } + + // display of word counts + + if (os) { + format_flags = os->setf(ios::right , ios::adjustfield); + + for (j = 0;j < i;j++) { + for (k = 0;k < word_length;k++) { + *os << word[index[j]][k] << " "; + } + *os << " " << setw(width) << frequency[index[j]] + << " " << probability[index[j]]; + + if (j == 0) { + *os << " (" << nb_word << " " << SEQ_label[SEQL_WORDS] << ", " + << total_frequency << ")"; + } + *os << endl; + } + + os->setf(format_flags , ios::adjustfield); + } + + delete [] power; + delete [] frequency; + delete [] word_value; + delete [] index; + delete [] selected_word; + delete [] probability; + + for (i = 0;i < nb_word;i++) { + delete [] word[i]; + } + delete [] word; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of a the autocorrelation function for a given state and + * a variable for the assessment of semi-Markov switching autoregressive models. + * + * \param[in] correl reference on a Correlation object, + * \param[in] state state, + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::autocorrelation_computation(Correlation &correl , int state , int variable) const + +{ + if (marginal_distribution[0]->frequency[state] >= AUTOCORRELATION_MIN_FREQUENCY) { + int i , j , k; + int max_lag; + double mean , variance , diff; + + + // computation of mean and variance + + mean = 0.; + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (int_sequence[i][0][j] == state) { + mean += int_sequence[i][variable][j]; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (int_sequence[i][0][j] == state) { + mean += real_sequence[i][variable][j]; + } + } + } + break; + } + } + + mean /= marginal_distribution[0]->frequency[state]; + + variance = 0.; + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (int_sequence[i][0][j] == state) { + diff = int_sequence[i][variable][j] - mean; + variance += diff * diff; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (int_sequence[i][0][j] == state) { + diff = real_sequence[i][variable][j] - mean; + variance += diff * diff; + } + } + } + break; + } + } + + variance /= marginal_distribution[0]->frequency[state]; + + // computation of the autocorrelation coefficients + + correl.point[0][0] = 1.; + correl.frequency[0] = marginal_distribution[0]->frequency[state]; + + for (i = 1;i < correl.length;i++) { + correl.point[0][i] = 0.; + correl.frequency[i] = 0; + + switch (type[variable]) { + + case INT_VALUE : { + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < i;k++) { + if (int_sequence[j][0][k] == state) { + if ((k == 0) || (int_sequence[j][0][k - 1] != state)) { + max_lag = 0; + } + else { + max_lag++; + } + } + } + + for (k = i;k < length[j];k++) { + if (int_sequence[j][0][k] == state) { + if (int_sequence[j][0][k - 1] != state) { + max_lag = 0; + } + else { + max_lag++; + } + + if (i <= max_lag) { + correl.point[0][i] += (int_sequence[j][variable][k] - mean) * (int_sequence[j][variable][k - i] - mean); + correl.frequency[i]++; + } + } + } + } + break; + } + + case REAL_VALUE : { + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < i;k++) { + if (int_sequence[j][0][k] == state) { + if ((k == 0) || (int_sequence[j][0][k - 1] != state)) { + max_lag = 0; + } + else { + max_lag++; + } + } + } + + for (k = i;k < length[j];k++) { + if (int_sequence[j][0][k] == state) { + if (int_sequence[j][0][k - 1] != state) { + max_lag = 0; + } + else { + max_lag++; + } + + if (i <= max_lag) { + correl.point[0][i] += (real_sequence[j][variable][k] - mean) * (real_sequence[j][variable][k - i] - mean); + correl.frequency[i]++; + } + } + } + } + break; + } + } + + if ((correl.frequency[i] >= marginal_distribution[0]->frequency[state] * AUTOCORRELATION_FREQUENCY_RATIO) && + (correl.frequency[i] >= AUTOCORRELATION_MIN_FREQUENCY)) { + correl.point[0][i] /= correl.frequency[i] * variance; + } + else { + correl.length = i; + break; + } + } + } + + else { + correl.length = 0; + } +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/markovian_sequences2.cpp b/src/cpp/sequence_analysis/markovian_sequences2.cpp new file mode 100644 index 0000000..25b90da --- /dev/null +++ b/src/cpp/sequence_analysis/markovian_sequences2.cpp @@ -0,0 +1,3122 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include +#include +#include + +#include + +#include "stat_tool/stat_label.h" + +#include "stat_tool/quantile_computation.hpp" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a MarkovianSequences object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level, + * \param[in] comment_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& MarkovianSequences::ascii_write(ostream &os , bool exhaustive , bool comment_flag) const + +{ + int i , j , k; + int *int_value , *pint_value; + double mean , variance , median , lower_quartile , upper_quartile , *real_value , *preal_value; + + + if (index_param_type == TIME) { + os << SEQ_word[SEQW_INDEX_PARAMETER] << " : " + << SEQ_index_parameter_word[index_param_type] << " "; + if (comment_flag) { + os << "# "; + } + os << "(" << SEQ_label[SEQL_MIN_INDEX_PARAMETER] << ": " << index_parameter_distribution->offset << ", " + << SEQ_label[SEQL_MAX_INDEX_PARAMETER] << ": " << index_parameter_distribution->nb_value - 1 << ")" << endl; + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + index_parameter_distribution->ascii_characteristic_print(os , false , comment_flag); + + if (exhaustive) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + index_parameter_distribution->ascii_print(os , comment_flag); + } + + if (index_interval) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_TIME_INTERVAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + index_interval->ascii_characteristic_print(os , false , comment_flag); + + if (exhaustive) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_TIME_INTERVAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + index_interval->ascii_print(os , comment_flag); + } + } + + os << "\n"; + } + + os << nb_variable << " " << STAT_word[nb_variable == 1 ? STATW_VARIABLE : STATW_VARIABLES] << endl; + + if ((self_transition) && (exhaustive)) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (self_transition[i]) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_STATE] << " " << i << " - " + << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_SELF_TRANSITION] << endl; + + self_transition[i]->ascii_print(os , comment_flag); + } + } + } + + for (i = 0;i < nb_variable;i++) { + os << "\n" << STAT_word[STATW_VARIABLE] << " " << i + 1 << " : " + << STAT_variable_word[type[i]]; + + if (type[i] != AUXILIARY) { + os << " "; + if (comment_flag) { + os << "# "; + } + + if (type[i] == STATE) { + os << "(" << marginal_distribution[i]->nb_value << " " + << STAT_label[marginal_distribution[i]->nb_value == 1 ? STATL_STATE : STATL_STATES] << ")" << endl; + } + else { + os << "(" << STAT_label[STATL_MIN_VALUE] << ": " << min_value[i] << ", " + << STAT_label[STATL_MAX_VALUE] << ": " << max_value[i] << ")" << endl; + } + + if (marginal_distribution[i]) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[type[i] == STATE ? STATL_STATE : STATL_MARGINAL] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + marginal_distribution[i]->ascii_characteristic_print(os , false , comment_flag); + + if ((marginal_distribution[i]->nb_value <= ASCII_NB_VALUE) || (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_FREQUENCY] << endl; + marginal_distribution[i]->ascii_print(os , comment_flag); + } + +# ifdef DEBUG + ContinuousParametric *dist; +; + marginal_histogram[i] = new Histogram(*marginal_distribution[i]); + + dist = new ContinuousParametric(VON_MISES , 137.5 , 180 * 180 / (50. * 50. * M_PI * M_PI) , DEGREE); + os << "\n"; + dist->ascii_parameter_print(os); + dist->ascii_print(os , comment_flag , true , NULL , marginal_distribution[i]); + os << "\n"; + dist->ascii_print(os , comment_flag , true , marginal_histogram[i]); + delete dist; + + dist = new ContinuousParametric(GAUSSIAN , 137.5 , 50.); + os << "\n"; + dist->ascii_parameter_print(os); + dist->ascii_print(os , comment_flag , true , NULL , marginal_distribution[i]); +// os << "\n"; +// dist->ascii_print(os , comment_flag , true , marginal_histogram[i]); + delete dist; + + delete marginal_histogram[i]; + marginal_histogram[i] = NULL; +# endif + + } + + else { + mean = mean_computation(i); + variance = variance_computation(i , mean); + + if (variance > 0.) { + switch (type[i]) { + + case INT_VALUE : { + int_value = new int[cumul_length]; + pint_value = int_value; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + *pint_value++ = int_sequence[j][i][k]; + } + } + + lower_quartile = quantile_computation(cumul_length , int_value , 0.25); + median = quantile_computation(cumul_length , int_value , 0.5); + upper_quartile = quantile_computation(cumul_length , int_value , 0.75); + + delete [] int_value; + break; + } + + case REAL_VALUE : { + real_value = new double[cumul_length]; + preal_value = real_value; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + *preal_value++ = real_sequence[j][i][k]; + } + } + + lower_quartile = quantile_computation(cumul_length , real_value , 0.25); + median = quantile_computation(cumul_length , real_value , 0.5); + upper_quartile = quantile_computation(cumul_length , real_value , 0.75); + + delete [] real_value; + break; + } + } + } + + else { + median = mean; + } + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_SAMPLE_SIZE] << ": " << cumul_length << endl; + + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_MEAN] << ": " << mean << " " + << STAT_label[STATL_MEDIAN] << ": " << median << endl; + + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_VARIANCE] << ": " << variance << " " + << STAT_label[STATL_STANDARD_DEVIATION] << ": " << sqrt(variance); + if (variance > 0.) { + os << " " << STAT_label[STATL_LOWER_QUARTILE] << ": " << lower_quartile + << " " << STAT_label[STATL_UPPER_QUARTILE] << ": " << upper_quartile; + } + os << endl; + + if ((variance > 0.) && (exhaustive)) { + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_SKEWNESS_COEFF] << ": " << skewness_computation(i , mean , variance) << " " + << STAT_label[STATL_KURTOSIS_COEFF] << ": " << kurtosis_computation(i , mean , variance) << endl; + } + + if ((marginal_histogram[i]) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_HISTOGRAM] << endl; + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " " << STAT_label[STATL_VALUE] << " | " << STAT_label[STATL_FREQUENCY] << endl; + marginal_histogram[i]->ascii_print(os , comment_flag); + } + } + + if (characteristics[i]) { + characteristics[i]->ascii_print(os , type[i] , *length_distribution , exhaustive , comment_flag); + } + } + + else { + +# ifdef MESSAGE + mean = mean_computation(i); + variance = variance_computation(i , mean); + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_MEAN] << ": " << mean << " " + << STAT_label[STATL_VARIANCE] << ": " << variance << " " + << STAT_label[STATL_STANDARD_DEVIATION] << ": " << sqrt(variance) << endl; +# endif + +// os << endl; + } + } + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + length_distribution->ascii_characteristic_print(os , false , comment_flag); + + if (exhaustive) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + length_distribution->ascii_print(os , comment_flag); + } + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_CUMUL_LENGTH] << ": " << cumul_length << endl; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a MarkovianSequences object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& MarkovianSequences::ascii_write(ostream &os , bool exhaustive) const + +{ + return ascii_write(os , exhaustive , false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a MarkovianSequences object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + ascii_write(out_file , exhaustive , false); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a MarkovianSequences object. + * + * \param[in,out] os stream, + * \param[in] format format (LINE/COLUMN/VECTOR/POSTERIOR_PROBABILITY), + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& MarkovianSequences::ascii_data_write(ostream &os , output_sequence_format format , + bool exhaustive) const + +{ + ascii_write(os , exhaustive , false); + ascii_print(os , format , false); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a MarkovianSequences object. + * + * \param[in] format format (LINE/COLUMN/VECTOR/POSTERIOR_PROBABILITY), + * \param[in] exhaustive flag detail level, + * + * \return string. + */ +/*--------------------------------------------------------------*/ + +string MarkovianSequences::ascii_data_write(output_sequence_format format , bool exhaustive) const + +{ + ostringstream oss; + + + ascii_data_write(oss , format , exhaustive); + + return oss.str(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a MarkovianSequences object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] format format (LINE/COLUMN/VECTOR/POSTERIOR_PROBABILITY), + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::ascii_data_write(StatError &error , const string path , + output_sequence_format format , bool exhaustive) const + +{ + bool status = false; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + if (format != 'a') { + ascii_write(out_file , exhaustive , true); + } + ascii_print(out_file , format , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a MarkovianSequences object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::spreadsheet_write(StatError &error , const string path) const + +{ + bool status; + int i , j , k; + int *int_value , *pint_value; + double mean , variance , median , lower_quartile , upper_quartile , *real_value , *preal_value; + Curves *smoothed_curves; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + + if (index_param_type == TIME) { + out_file << SEQ_word[SEQW_INDEX_PARAMETER] << "\t" + << SEQ_index_parameter_word[index_param_type] << "\t\t" + << SEQ_label[SEQL_MIN_INDEX_PARAMETER] << "\t" << index_parameter_distribution->offset << "\t\t" + << SEQ_label[SEQL_MAX_INDEX_PARAMETER] << "\t" << index_parameter_distribution->nb_value - 1 << endl; + + out_file << "\n" << SEQ_label[SEQL_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + index_parameter_distribution->spreadsheet_characteristic_print(out_file); + + out_file << "\n\t" << SEQ_label[SEQL_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + index_parameter_distribution->spreadsheet_print(out_file); + out_file << endl; + } + + out_file << nb_variable << "\t" << STAT_word[nb_variable == 1 ? STATW_VARIABLE : STATW_VARIABLES] << endl; + + if (self_transition) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (self_transition[i]) { + out_file << "\n\t" << STAT_label[STATL_STATE] << " " << i << " - " + << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_SELF_TRANSITION] << endl; + self_transition[i]->spreadsheet_print(out_file); + + smoothed_curves = new Curves(*(self_transition[i]) , SMOOTHING); + + out_file << "\n\t" << STAT_label[STATL_STATE] << " " << i << " - " + << SEQ_label[SEQL_SMOOTHED] << " " << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_SELF_TRANSITION] << endl; + smoothed_curves->spreadsheet_print(out_file); + + delete smoothed_curves; + } + } + } + + for (i = 0;i < nb_variable;i++) { + out_file << "\n" << STAT_word[STATW_VARIABLE] << "\t" << i + 1 << "\t" + << STAT_variable_word[type[i]]; + + if (type[i] != AUXILIARY) { + if (type[i] == STATE) { + out_file << "\t\t" << marginal_distribution[i]->nb_value << "\t" + << STAT_label[marginal_distribution[i]->nb_value == 1 ? STATL_STATE : STATL_STATES] << endl; + } + else { + out_file << "\t\t" << STAT_label[STATL_MIN_VALUE] << "\t" << min_value[i] + << "\t\t" << STAT_label[STATL_MAX_VALUE] << "\t" << max_value[i] << endl; + } + + if (marginal_distribution[i]) { + out_file << "\n" << STAT_label[type[i] == STATE ? STATL_STATE : STATL_MARGINAL] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + marginal_distribution[i]->spreadsheet_characteristic_print(out_file); + + out_file << "\n\t" << STAT_label[STATL_FREQUENCY] << endl; + marginal_distribution[i]->spreadsheet_print(out_file); + } + + else { + mean = mean_computation(i); + variance = variance_computation(i , mean); + + if (variance > 0.) { + switch (type[i]) { + + case INT_VALUE : { + int_value = new int[cumul_length]; + pint_value = int_value; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + *pint_value++ = int_sequence[j][i][k]; + } + } + + lower_quartile = quantile_computation(cumul_length , int_value , 0.25); + median = quantile_computation(cumul_length , int_value , 0.5); + upper_quartile = quantile_computation(cumul_length , int_value , 0.75); + + delete [] int_value; + break; + } + + case REAL_VALUE : { + real_value = new double[cumul_length]; + preal_value = real_value; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + *preal_value++ = real_sequence[j][i][k]; + } + } + + lower_quartile = quantile_computation(cumul_length , real_value , 0.25); + median = quantile_computation(cumul_length , real_value , 0.5); + upper_quartile = quantile_computation(cumul_length , real_value , 0.75); + + delete [] real_value; + break; + } + } + } + + else { + median = mean; + } + + out_file << "\n" << STAT_label[STATL_SAMPLE_SIZE] << "\t" << cumul_length << endl; + + out_file << STAT_label[STATL_MEAN] << "\t" << mean << "\t\t" + << STAT_label[STATL_MEDIAN] << "\t" << median << endl; + + out_file << STAT_label[STATL_VARIANCE] << "\t" << variance << "\t\t" + << STAT_label[STATL_STANDARD_DEVIATION] << "\t" << sqrt(variance); + if (variance > 0.) { + out_file << "\t\t" << STAT_label[STATL_LOWER_QUARTILE] << "\t" << lower_quartile + << "\t\t" << STAT_label[STATL_UPPER_QUARTILE] << "\t" << upper_quartile; + } + out_file << endl; + + if (variance > 0.) { + out_file << STAT_label[STATL_SKEWNESS_COEFF] << "\t" << skewness_computation(i , mean , variance) << "\t\t" + << STAT_label[STATL_KURTOSIS_COEFF] << "\t" << kurtosis_computation(i , mean , variance) << endl; + } + + if (marginal_histogram[i]) { + out_file << "\n" << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_HISTOGRAM] << endl; + out_file << "\n" << STAT_label[STATL_VALUE] << "\t" << STAT_label[STATL_FREQUENCY] << endl; + marginal_histogram[i]->spreadsheet_print(out_file); + } + } + + if (characteristics[i]) { + characteristics[i]->spreadsheet_print(out_file , type[i] , *length_distribution); + } + } + + else { + out_file << endl; + } + } + + out_file << "\n" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + length_distribution->spreadsheet_characteristic_print(out_file); + + out_file << "\n\t" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + length_distribution->spreadsheet_print(out_file); + + out_file << "\n" << SEQ_label[SEQL_CUMUL_LENGTH] << "\t" << cumul_length << endl; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a MarkovianSequences object using Gnuplot for a variable + * in the case of the absence of the characteristic distributions. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] variable variable index, + * \param[in] nb_variable number of variables. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::plot_print(const char *prefix , const char *title , int variable , + int nb_variable) const + +{ + bool status; + int i; + int nb_histo; + const FrequencyDistribution *phisto[1]; + ostringstream data_file_name[2]; + + + // writing of the data files + + data_file_name[0] << prefix << variable + 1 << 0 << ".dat"; + + nb_histo = 0; + if (index_parameter_distribution) { + phisto[nb_histo++] = index_parameter_distribution; + } + + status = length_distribution->plot_print((data_file_name[0].str()).c_str() , nb_histo , phisto); + + if (status) { + if (marginal_distribution[variable]) { + data_file_name[1] << prefix << variable + 1 << 1 << ".dat"; + marginal_distribution[variable]->plot_print((data_file_name[1].str()).c_str()); + } + else if (marginal_histogram[variable]) { + data_file_name[1] << prefix << variable + 1 << 1 << ".dat"; + marginal_histogram[variable]->plot_print((data_file_name[1].str()).c_str()); + } + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + + case 0 : { + if (nb_variable == 1) { + file_name[0] << prefix << ".plot"; + } + else { + file_name[0] << prefix << variable + 1 << ".plot"; + } + break; + } + + case 1 : { + if (nb_variable == 1) { + file_name[0] << prefix << ".print"; + } + else { + file_name[0] << prefix << variable + 1 << ".print"; + } + break; + } + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + + if (nb_variable == 1) { + file_name[1] << label(prefix) << ".ps"; + } + else { + file_name[1] << label(prefix) << variable + 1 << ".ps"; + } + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + if (marginal_distribution[variable]) { + if (marginal_distribution[variable]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(marginal_distribution[variable]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << MAX(marginal_distribution[variable]->nb_value - 1 , 1) << "] [0:" + << (int)(marginal_distribution[variable]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using 1 title \"" + << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - " + << STAT_label[type[variable] == STATE ? STATL_STATE : STATL_MARGINAL] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (marginal_distribution[variable]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(marginal_distribution[variable]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + else if (marginal_histogram[variable]) { + if ((int)(marginal_histogram[variable]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [" << marginal_histogram[variable]->min_value - marginal_histogram[variable]->bin_width << ":" + << marginal_histogram[variable]->max_value + marginal_histogram[variable]->bin_width << "] [0:" + << (int)(marginal_histogram[variable]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using 1:2 title \"" + << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " " + << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_HISTOGRAM] + << "\" with histeps" << endl; + + if ((int)(marginal_histogram[variable]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(length_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << length_distribution->nb_value - 1 << "] [0:" + << (int)(length_distribution->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using 1 title \"" + << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(length_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (index_parameter_distribution) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (index_parameter_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(index_parameter_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [" << index_parameter_distribution->offset << ":" + << index_parameter_distribution->nb_value - 1 << "] [0:" + << (int)(index_parameter_distribution->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using 2 title \"" + << SEQ_label[SEQL_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (index_parameter_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(index_parameter_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a MarkovianSequences object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status , start; + int i , j; + int max_frequency[NB_OUTPUT]; + ostringstream data_file_name[NB_OUTPUT]; + + + error.init(); + + if (characteristics[0]) { + status = characteristics[0]->plot_print(prefix , title , 0 , nb_variable , type[0] , *length_distribution); + } + else { + status = plot_print(prefix , title , 0 , nb_variable); + } + + if (status) { + for (i = 1;i < nb_variable;i++) { + if (characteristics[i]) { + characteristics[i]->plot_print(prefix , title , i , nb_variable , type[i] , *length_distribution); + } + else { + plot_print(prefix , title , i , nb_variable); + } + } + + if (self_transition) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (self_transition[i]) { + max_frequency[i] = self_transition[i]->max_frequency_computation(); + + data_file_name[i] << prefix << i << ".dat"; + self_transition[i]->plot_print_standard_residual((data_file_name[i].str()).c_str()); + } + } + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << 1 << 0 << ".plot"; + break; + case 1 : + file_name[0] << prefix << 1 << 0 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << 1 << 0 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (nb_variable > 1)) { + out_file << " \""; + if (title) { + out_file << title; + if (nb_variable > 1) { + out_file << " - "; + } + } + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << 1; + } + out_file << "\""; + } + out_file << "\n\n"; + + start = true; + for (j = 0;j < marginal_distribution[0]->nb_value;j++) { + if (self_transition[j]) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (self_transition[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << self_transition[j]->length - 1 << "] [0:1] \"" + << label((data_file_name[j].str()).c_str()) << "\" using 1:2 title \"" + << STAT_label[STATL_STATE] << " " << j << " - " << SEQ_label[SEQL_OBSERVED] << " " + << SEQ_label[SEQL_SELF_TRANSITION] << "\" with points" << endl; + + if (self_transition[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set xlabel \"" << SEQ_label[SEQL_INDEX] << "\"" << endl; + out_file << "set ylabel \"" << STAT_label[STATL_FREQUENCY] << "\"" << endl; + + if (self_transition[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(max_frequency[j] * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << self_transition[j]->length - 1 + << "] [0:" << (int)(max_frequency[j] * YSCALE) + 1 << "] \"" + << label((data_file_name[j].str()).c_str()) + << "\" using 1:3 title \"" << SEQ_label[SEQL_TRANSITION_COUNTS] + << "\" with impulses" << endl; + + if (self_transition[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(max_frequency[j] * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + out_file << "set xlabel" << endl; + out_file << "set ylabel" << endl; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a MarkovianSequences object for a variable + * in the case of the absence of the characteristic distributions. + * + * \param[in] plot reference on a MultiPlotSet object, + * \param[in] index MultiPlot index, + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::plotable_write(MultiPlotSet &plot , int &index , int variable) const + +{ + ostringstream legend; + + + /* nb_plot_set = 1; + if ((marginal_distribution[variable]) || (marginal_histogram[variable])) { + nb_plot_set++; + } + if (index_parameter_distribution) { + nb_plot_set++; + } */ + + plot.variable_nb_viewpoint[variable] = 1; + + if (marginal_distribution[variable]) { + + // marginal frequency distribution + + plot.variable[index] = variable; + + plot[index].xrange = Range(0 , MAX(marginal_distribution[variable]->nb_value - 1 , 1)); + plot[index].yrange = Range(0 , ceil(marginal_distribution[variable]->max * YSCALE)); + + if (marginal_distribution[variable]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(marginal_distribution[variable]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - " + << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + marginal_distribution[variable]->plotable_frequency_write(plot[index][0]); + } + + else if (marginal_histogram[variable]) { + + // marginal histogram + + plot[index].xrange = Range(marginal_histogram[variable]->min_value - marginal_histogram[variable]->bin_width , + marginal_histogram[variable]->max_value + marginal_histogram[variable]->bin_width); + plot[index].yrange = Range(0 , ceil(marginal_histogram[variable]->max * YSCALE)); + + if (ceil(marginal_histogram[variable]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " " + << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_HISTOGRAM]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "histeps"; + + marginal_histogram[variable]->plotable_write(plot[index][0]); + } + + index++; + + // sequence length frequency distribution + + plot.variable[index] = variable; + + plot[index].xrange = Range(0 , length_distribution->nb_value - 1); + plot[index].yrange = Range(0 , ceil(length_distribution->max * YSCALE)); + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(length_distribution->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + length_distribution->plotable_frequency_write(plot[index][0]); + index++; + + if (index_parameter_distribution) { + + // index parameter frequency distribution + + plot.variable[index] = variable; + + plot[index].xrange = Range(index_parameter_distribution->offset , index_parameter_distribution->nb_value - 1); + plot[index].yrange = Range(0 , ceil(index_parameter_distribution->max * YSCALE)); + + if (index_parameter_distribution->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(index_parameter_distribution->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + index_parameter_distribution->plotable_frequency_write(plot[index][0]); + index++; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a MarkovianSequences object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* MarkovianSequences::get_plotable() const + +{ + int i , j; + int nb_plot_set , index_length , index , max_frequency; + ostringstream title , legend; + MultiPlotSet *plot_set; + + + // computation of the number of plots + + nb_plot_set = 0; + + for (i = 0;i < nb_variable;i++) { + if (characteristics[i]) { + index_length = characteristics[i]->index_value->plot_length_computation(); + + nb_plot_set += 2; + if (characteristics[i]->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + nb_plot_set++; + } + + nb_plot_set++; + for (j = 0;j < characteristics[i]->nb_value;j++) { + if (characteristics[i]->first_occurrence[j]->nb_element > 0) { + nb_plot_set++; + } + } + + nb_plot_set++; + for (j = 0;j < characteristics[i]->nb_value;j++) { + if (characteristics[i]->recurrence_time[j]->nb_element > 0) { + nb_plot_set++; + } + } + + nb_plot_set++; + for (j = 0;j < characteristics[i]->nb_value;j++) { + if (characteristics[i]->sojourn_time[j]->nb_element > 0) { + nb_plot_set++; + } + if ((characteristics[i]->initial_run) && + (characteristics[i]->initial_run[j]->nb_element > 0)) { + nb_plot_set++; + } + if (characteristics[i]->final_run[j]->nb_element > 0) { + nb_plot_set++; + } + } + + if ((characteristics[i]->nb_run) && (characteristics[i]->nb_occurrence)) { + nb_plot_set += 3; + for (j = 0;j < characteristics[i]->nb_value;j++) { + if ((characteristics[i]->nb_run[j]->nb_element > 0) && + (characteristics[i]->nb_occurrence[j]->nb_element > 0)) { + nb_plot_set += 2; + } + } + } + } + + else if (type[i] != AUXILIARY) { + nb_plot_set++; + if ((marginal_distribution[i]) || (marginal_histogram[i])) { + nb_plot_set++; + } + if (index_parameter_distribution) { + nb_plot_set++; + } + } + } + + if (self_transition) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (self_transition[i]) { + nb_plot_set += 2; + } + } + } + + plot_set = new MultiPlotSet(nb_plot_set , nb_variable); + + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + for (i = 0;i < nb_variable;i++) { + plot.variable_nb_viewpoint[i] = 0; + } + + index = 0; + if (self_transition) { + plot.variable_nb_viewpoint[0]++; + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (self_transition[i]) { + + // self-transition probability as a function of the index parameter + + if (nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , self_transition[i]->length - 1); + plot[index].yrange = Range(0. , 1.); + + if (self_transition[i]->length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " - " + << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_SELF_TRANSITION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "linespoint"; + + self_transition[i]->plotable_write(0 , plot[index][0]); + index++; + + // frequency distributions of indexed transition counts + + if (nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , self_transition[i]->length - 1); + max_frequency = self_transition[i]->max_frequency_computation(); + plot[index].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (self_transition[i]->length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].xlabel = SEQ_label[SEQL_INDEX]; + plot[index].ylabel = STAT_label[STATL_FREQUENCY]; + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " - " + << SEQ_label[SEQL_TRANSITION_COUNTS]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + self_transition[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + } + + for (i = 0;i < nb_variable;i++) { + if (characteristics[i]) { + characteristics[i]->plotable_write(plot , index , i , type[i] , *length_distribution); + } + else if (type[i] != AUXILIARY) { + plotable_write(plot , index , i); + } + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a fitted observation linear trend model at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] variable variable index, + * \param[in] process pointer on a continuous observation process. + */ +/*--------------------------------------------------------------*/ + +ostream& MarkovianSequences::linear_model_spreadsheet_print(ostream &os , int variable , + ContinuousParametricProcess *process) const + +{ + bool *used_sequence; + int i , j , k , m , n , r; + int frequency , *index; + double buff; + + + if (type[variable] == INT_VALUE) { + used_sequence = new bool[nb_sequence]; + } + if (index_param_type == TIME) { + index = new int[nb_sequence]; + } + + os << "\n" << SEQ_label[SEQL_INDEX] << "\t" << STAT_label[STATL_OBSERVATION]; + for (i = 0;i < process->nb_state;i++) { + os << "\t" << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_LINEAR_MODEL]; + } + if (type[variable] == INT_VALUE) { + os << "\t" << STAT_label[STATL_FREQUENCY]; + } + os << endl; + + switch (type[0]) { + + case STATE : { + for (i = 0;i < process->nb_state;i++) { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + switch (type[variable]) { + + case INT_VALUE : { + for (j = 0;j < max_length;j++) { + for (k = 0;k < nb_sequence;k++) { + used_sequence[k] = false; + } + for (k = 0;k < nb_sequence;k++) { + if ((j < length[k]) && (int_sequence[k][0][j] == i) && (!used_sequence[k])) { + used_sequence[k] = true; + os << j << "\t" << int_sequence[k][variable][j]; + for (m = 0;m <= i;m++) { + os << "\t"; + } + os << process->observation[i]->intercept + process->observation[i]->slope * j; + + frequency = 1; + for (m = k + 1;m < nb_sequence;m++) { + if ((j < length[m]) && (int_sequence[m][0][j] == i) && + (int_sequence[m][variable][j] == int_sequence[k][variable][j])) { + used_sequence[m] = true; + frequency++; + } + } + for (m = i;m < process->nb_state;m++) { + os << "\t"; + } + os << frequency << endl; + } + } + } + break; + } + + case REAL_VALUE : { + for (j = 0;j < max_length;j++) { + for (k = 0;k < nb_sequence;k++) { + if ((j < length[k]) && (int_sequence[k][0][j] == i)) { + os << j << "\t" << real_sequence[k][variable][j]; + for (m = 0;m <= i;m++) { + os << "\t"; + } + os << process->observation[i]->intercept + process->observation[i]->slope * j << endl; + } + } + } + break; + } + } + break; + } + + case TIME : { + for (j = 0;j < nb_sequence;j++) { + index[j] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (j = index_parameter_distribution->offset;j < index_parameter_distribution->nb_value;j++) { + if (index_parameter_distribution->frequency[j] > 0) { + for (k = 0;k < nb_sequence;k++) { + used_sequence[k] = false; + } + for (k = 0;k < nb_sequence;k++) { + m = index[k]; + while ((m < length[k] - 1) && (index_parameter[k][m] < j)) { + m++; + } + + if ((index_parameter[k][m] == j) && (int_sequence[k][0][m] == i) && (!used_sequence[k])) { + index[k] = m; + used_sequence[k] = true; + os << j << "\t" << int_sequence[k][variable][m]; + for (n = 0;n <= i;n++) { + os << "\t"; + } + os << process->observation[i]->intercept + process->observation[i]->slope * j; + + frequency = 1; + for (n = k + 1;n < nb_sequence;n++) { + r = index[n]; + while ((r < length[n] - 1) && (index_parameter[n][r] < j)) { + r++; + } + + if ((index_parameter[n][r] == j) && (int_sequence[n][0][r] == i) && + (int_sequence[n][variable][r] == int_sequence[k][variable][m])) { + index[n] = r; + used_sequence[n] = true; + frequency++; + } + } + + for (n = i;n < process->nb_state;n++) { + os << "\t"; + } + os << frequency << endl; + } + } + } + } + break; + } + + case REAL_VALUE : { + for (j = index_parameter_distribution->offset;j < index_parameter_distribution->nb_value;j++) { + if (index_parameter_distribution->frequency[j] > 0) { + for (k = 0;k < nb_sequence;k++) { + m = index[k]; + while ((m < length[k] - 1) && (index_parameter[k][m] < j)) { + m++; + } + + if ((index_parameter[k][m] == j) && (int_sequence[k][0][m] == i)) { + index[k] = m; + os << j << "\t" << real_sequence[k][variable][m]; + for (n = 0;n <= i;n++) { + os << "\t"; + } + os << process->observation[i]->intercept + process->observation[i]->slope * j << endl; + } + } + } + } + break; + } + } + break; + } + } + } + break; + } + + default : { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < max_length;i++) { + for (j = 0;j < nb_sequence;j++) { + used_sequence[j] = false; + } + for (j = 0;j < nb_sequence;j++) { + if ((i < length[j]) && (!used_sequence[j])) { + used_sequence[j] = true; + os << i << "\t" << int_sequence[j][variable][i]; + for (k = 0;k < process->nb_state;k++) { + os << "\t"; + buff = process->observation[k]->intercept + process->observation[k]->slope * i; + if ((buff >= min_value[variable]) && (buff <= max_value[variable])) { + os << buff; + } + } + + frequency = 1; + for (k = j + 1;k < nb_sequence;k++) { + if ((i < length[k]) && (int_sequence[k][variable][i] == int_sequence[j][variable][i])) { + used_sequence[k] = true; + frequency++; + } + } + os << "\t" << frequency << endl; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < max_length;i++) { + for (j = 0;j < nb_sequence;j++) { + if (i < length[j]) { + os << i << "\t" << real_sequence[j][variable][i]; + for (k = 0;k < process->nb_state;k++) { + os << "\t"; + buff = process->observation[k]->intercept + process->observation[k]->slope * i; + if ((buff >= min_value[variable]) && ( buff <= max_value[variable])) { + os << buff; + } + } + os << endl; + } + } + } + break; + } + } + break; + } + + case TIME : { + for (i = 0;i < nb_sequence;i++) { + index[i] = 0; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = index_parameter_distribution->offset;i < index_parameter_distribution->nb_value;i++) { + if (index_parameter_distribution->frequency[i] > 0) { + for (j = 0;j < nb_sequence;j++) { + used_sequence[j] = false; + } + for (j = 0;j < nb_sequence;j++) { + k = index[j]; + while ((k < length[j] - 1) && (index_parameter[j][k] < i)) { + k++; + } + + if ((index_parameter[j][k] == i) && (!used_sequence[j])) { + index[j] = k; + used_sequence[j] = true; + os << i << "\t" << int_sequence[j][variable][k]; + for (m = 0;m < process->nb_state;m++) { + os << "\t"; + buff = process->observation[m]->intercept + process->observation[m]->slope * i; + if ((buff >= min_value[variable]) && (buff <= max_value[variable])) { + os << buff; + } + } + + frequency = 1; + for (m = j + 1;m < nb_sequence;m++) { + n = index[m]; + while ((n < length[m] - 1) && (index_parameter[m][n] < i)) { + n++; + } + + if ((index_parameter[m][n] == i) && (int_sequence[m][variable][n] == int_sequence[j][variable][k])) { + index[m] = n; + used_sequence[m] = true; + frequency++; + } + } + os << "\t" << frequency << endl; + } + } + } + } + break; + } + + case REAL_VALUE : { + for (i = index_parameter_distribution->offset;i < index_parameter_distribution->nb_value;i++) { + if (index_parameter_distribution->frequency[i] > 0) { + for (j = 0;j < nb_sequence;j++) { + k = index[j]; + while ((k < length[j] - 1) && (index_parameter[j][k] < i)) { + k++; + } + + if (index_parameter[j][k] == i) { + index[j] = k; + os << i << "\t" << real_sequence[j][variable][k]; + for (m = 0;m < process->nb_state;m++) { + os << "\t"; + buff = process->observation[m]->intercept + process->observation[m]->slope * i; + if ((buff >= min_value[variable]) && (buff <= max_value[variable])) { + os << buff; + } + } + os << endl; + } + } + } + } + break; + } + } + break; + } + } + break; + } + } + + if (type[variable] == INT_VALUE) { + delete [] used_sequence; + } + if (index_param_type == TIME) { + delete [] index; + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a fitted observation linear trend model using Gnuplot. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] variable variable index, + * \param[in] process pointer on a continuous observation process. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::linear_model_plot_print(const char *prefix , const char *title , int variable , + ContinuousParametricProcess *process) const + +{ + bool status = false; + int i , j; + int process_index , *state_min_index_parameter , *state_max_index_parameter , *pstate; + double buff , *state_min_value , *state_max_value; + ostringstream data_file_name[NB_STATE * 2 + 1]; + ofstream *out_data_file[NB_STATE + 1]; + + + // writing of data files + + state_min_index_parameter = new int[process->nb_state + 1]; + state_max_index_parameter = new int[process->nb_state + 1]; + + switch (type[0]) { + + case STATE : { + process_index = variable; + + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i < process->nb_state;i++) { + state_min_index_parameter[i] = max_length - 1; + state_max_index_parameter[i] = 0; + } + + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + + for (j = 0;j < length[i];j++) { + if (j < state_min_index_parameter[*pstate]) { + state_min_index_parameter[*pstate] = j; + } + if (j > state_max_index_parameter[*pstate]) { + state_max_index_parameter[*pstate] = j; + } + pstate++; + } + } + + state_min_index_parameter[process->nb_state] = 0; + state_max_index_parameter[process->nb_state] = max_length - 1; + break; + } + + case TIME : { + for (i = 0;i < process->nb_state;i++) { + state_min_index_parameter[i] = index_parameter_distribution->nb_value - 1; + state_max_index_parameter[i] = index_parameter_distribution->offset; + } + + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + + for (j = 0;j < length[i];j++) { + if (index_parameter[i][j] < state_min_index_parameter[*pstate]) { + state_min_index_parameter[*pstate] = index_parameter[i][j]; + } + if (index_parameter[i][j] > state_max_index_parameter[*pstate]) { + state_max_index_parameter[*pstate] = index_parameter[i][j]; + } + pstate++; + } + } + + state_min_index_parameter[process->nb_state] = index_parameter_distribution->offset; + state_max_index_parameter[process->nb_state] = index_parameter_distribution->nb_value - 1; + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_max_index_parameter[i] == state_min_index_parameter[i]) { + state_max_index_parameter[i]++; + } + + if (marginal_distribution[0]->frequency[i] == 0) { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + state_min_index_parameter[i] = 0; + state_max_index_parameter[i] = max_length - 1; + break; + } + + case TIME : { + state_min_index_parameter[i] = index_parameter_distribution->offset; + state_max_index_parameter[i] = index_parameter_distribution->nb_value - 1; + break; + } + } + + buff = (min_value[variable] - process->observation[i]->intercept) / process->observation[i]->slope; + if ((process->observation[i]->slope > 0.) && (buff > state_min_index_parameter[i])) { + state_min_index_parameter[i] = ceil(buff); + } + if ((process->observation[i]->slope < 0.) && (buff < state_max_index_parameter[i])) { + state_max_index_parameter[i] = floor(buff); + } + + buff = (max_value[variable] - process->observation[i]->intercept) / process->observation[i]->slope; + if ((process->observation[i]->slope < 0.) && (buff > state_min_index_parameter[i])) { + state_min_index_parameter[i] = ceil(buff); + } + if ((process->observation[i]->slope > 0.) && (buff < state_max_index_parameter[i])) { + state_max_index_parameter[i] = floor(buff); + } + } + } + + state_min_value = new double[process->nb_state]; + state_max_value = new double[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + state_min_value[i] = max_value[variable]; + state_max_value[i] = min_value[variable]; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + + for (j = 0;j < length[i];j++) { + if (int_sequence[i][variable][j] < state_min_value[*pstate]) { + state_min_value[*pstate] = int_sequence[i][variable][j]; + } + if (int_sequence[i][variable][j] > state_max_value[*pstate]) { + state_max_value[*pstate] = int_sequence[i][variable][j]; + } + pstate++; + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + + for (j = 0;j < length[i];j++) { + if (real_sequence[i][variable][j] < state_min_value[*pstate]) { + state_min_value[*pstate] = real_sequence[i][variable][j]; + } + if (real_sequence[i][variable][j] > state_max_value[*pstate]) { + state_max_value[*pstate] = real_sequence[i][variable][j]; + } + pstate++; + } + } + break; + } + } + break; + } + + default : { + process_index = variable + 1; + + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i <= process->nb_state;i++) { + state_min_index_parameter[i] = 0; + state_max_index_parameter[i] = max_length - 1; + } + break; + } + + case TIME : { + for (i = 0;i <= process->nb_state;i++) { + state_min_index_parameter[i] = index_parameter_distribution->offset; + state_max_index_parameter[i] = index_parameter_distribution->nb_value - 1; + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + buff = (min_value[variable] - process->observation[i]->intercept) / process->observation[i]->slope; + if ((process->observation[i]->slope > 0.) && (buff > state_min_index_parameter[i])) { + state_min_index_parameter[i] = ceil(buff); + } + if ((process->observation[i]->slope < 0.) && (buff < state_max_index_parameter[i])) { + state_max_index_parameter[i] = floor(buff); + } + + buff = (max_value[variable] - process->observation[i]->intercept) / process->observation[i]->slope; + if ((process->observation[i]->slope < 0.) && (buff > state_min_index_parameter[i])) { + state_min_index_parameter[i] = ceil(buff); + } + if ((process->observation[i]->slope > 0.) && (buff < state_max_index_parameter[i])) { + state_max_index_parameter[i] = floor(buff); + } + +// cout << "\n" << STAT_label[STATL_STATE] << " " << i << ": " << state_min_index_parameter[i] +// << ", " << state_max_index_parameter[i] << endl; + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + data_file_name[i * 2] << prefix << process_index << i * 2 << ".dat"; + out_data_file[0]= new ofstream((data_file_name[i * 2].str()).c_str()); + + if (out_data_file[0]) { + status = true; + + *out_data_file[0] << state_min_index_parameter[i] << " " + << process->observation[i]->intercept + process->observation[i]->slope * state_min_index_parameter[i] << endl; + *out_data_file[0] << state_max_index_parameter[i] << " " + << process->observation[i]->intercept + process->observation[i]->slope * state_max_index_parameter[i] << endl; + + out_data_file[0]->close(); + delete out_data_file[0]; + } + } + + if (type[0] == STATE) { + for (i = 0;i < process->nb_state;i++) { + if (marginal_distribution[0]->frequency[i] > 0) { + data_file_name[i * 2 + 1] << prefix << process_index << i * 2 + 1 << ".dat"; + out_data_file[i] = new ofstream ((data_file_name[i * 2 + 1].str()).c_str()); + } + } + switch (index_param_type) { + + case IMPLICIT_TYPE : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + for (j = 0;j < length[i];j++) { + *out_data_file[*pstate++] << j << " " << int_sequence[i][variable][j] << endl; + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + for (j = 0;j < length[i];j++) { + *out_data_file[*pstate++] << j << " " << real_sequence[i][variable][j] << endl; + } + } + break; + } + } + break; + } + + case TIME : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + for (j = 0;j < length[i];j++) { + *out_data_file[*pstate++] << index_parameter[i][j] << " " << int_sequence[i][variable][j] << endl; + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + for (j = 0;j < length[i];j++) { + *out_data_file[*pstate++] << index_parameter[i][j] << " " << real_sequence[i][variable][j] << endl; + } + } + break; + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (marginal_distribution[0]->frequency[i] > 0) { + out_data_file[i]->close(); + delete out_data_file[i]; + } + } + } + + data_file_name[process->nb_state * 2] << prefix << process_index << process->nb_state * 2 << ".dat"; + out_data_file[process->nb_state] = new ofstream ((data_file_name[process->nb_state * 2].str()).c_str()); + + if (out_data_file[process->nb_state]) { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + *out_data_file[process->nb_state] << j << " " << int_sequence[i][variable][j] << endl; + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + *out_data_file[process->nb_state] << j << " " << real_sequence[i][variable][j] << endl; + } + } + break; + } + } + break; + } + + case TIME : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + *out_data_file[process->nb_state] << index_parameter[i][j] << " " << int_sequence[i][variable][j] << endl; + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + *out_data_file[process->nb_state] << index_parameter[i][j] << " " << real_sequence[i][variable][j] << endl; + } + } + break; + } + } + break; + } + } + + out_data_file[process->nb_state]->close(); + delete out_data_file[process->nb_state]; + } + + if (status) { + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << process_index << ".plot"; + break; + case 1 : + file_name[0] << prefix << process_index << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << process << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << process_index << "\"\n\n"; + + out_file << "set xlabel \"" << SEQ_label[SEQL_INDEX] << "\"" << endl; + out_file << "set ylabel \"" << STAT_label[STATL_OBSERVATION] << "\"" << endl; + + if (type[0] == STATE) { + for (j = 0;j < process->nb_state;j++) { + if (marginal_distribution[0]->frequency[j] > 0) { + if (state_max_index_parameter[j] - state_min_index_parameter[j] < TIC_THRESHOLD) { + out_file << "set xtics " << state_min_index_parameter[j] << ",1" << endl; + } + if (state_max_value[j] - state_min_value[j] < TIC_THRESHOLD) { + out_file << "set ytics " << MIN(state_min_value[j] , 0) << ",1" << endl; + } + + out_file << "plot [" << state_min_index_parameter[j] << ":" << state_max_index_parameter[j] << "] ["; +/* if ((state_min_value[j] >= 0.) && (state_max_value[j] - state_min_value[j] > state_min_value[j] * PLOT_RANGE_RATIO)) { + out_file << 0; + } + else { */ + out_file << state_min_value[j]; +// } + out_file << ":" << MAX(state_max_value[j] , state_min_value[j] + 1) << "] \"" + << label((data_file_name[2 * j + 1].str()).c_str()) << "\" using 1:2 notitle with points,\\" << endl; + out_file << "\"" << label((data_file_name[2 * j].str()).c_str()) << "\" using 1:2 title \"" + << STAT_label[STATL_STATE] << " " << j << " " << STAT_label[STATL_OBSERVATION] << " " + << STAT_label[STATL_MODEL] << "\" with lines" << endl; + + if (state_max_index_parameter[j] - state_min_index_parameter[j] < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if (state_max_value[j] - state_min_value[j] < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + } + } + + if (state_max_index_parameter[process->nb_state] - state_min_index_parameter[process->nb_state] < TIC_THRESHOLD) { + out_file << "set xtics " << state_min_index_parameter[process->nb_state] << ",1" << endl; + } + if (max_value[variable] - min_value[variable] < TIC_THRESHOLD) { + out_file << "set ytics " << MIN(min_value[variable] , 0) << ",1" << endl; + } + + out_file << "plot [" << state_min_index_parameter[process->nb_state] << ":" << state_max_index_parameter[process->nb_state] << "] ["; +/* if ((min_value[variable] >= 0.) && (max_value[variable] - min_value[variable] > min_value[variable] * PLOT_RANGE_RATIO)) { + out_file << 0; + } + else { */ + out_file << min_value[variable]; +// } + out_file << ":" << MAX(max_value[variable] , min_value[variable] + 1) << "] \"" + << label((data_file_name[2 * process->nb_state].str()).c_str()) << "\" using 1:2 notitle with points,\\" << endl; + for (j = 0;j < process->nb_state;j++) { + out_file << "\"" << label((data_file_name[2 * j].str()).c_str()) << "\" using 1:2 title \"" + << STAT_label[STATL_STATE] << " " << j << " " << STAT_label[STATL_OBSERVATION] << " " + << STAT_label[STATL_MODEL] << "\" with lines"; + if (j < process->nb_state - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (state_max_index_parameter[process->nb_state] - state_min_index_parameter[process->nb_state] < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if (max_value[variable] - min_value[variable] < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + out_file << "set xlabel" << endl; + out_file << "set ylabel" << endl; + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + delete [] state_min_index_parameter; + delete [] state_max_index_parameter; + if (type[0] == STATE) { + delete [] state_min_value; + delete [] state_max_value; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a fitted observation linear trend model. + * + * \param[in] plot file prefix, + * \param[in] index MultiPlot index, + * \param[in] variable variable index, + * \param[in] process pointer on a continuous observation process. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::linear_model_plotable_write(MultiPlotSet &plot , int &index , int variable , + ContinuousParametricProcess *process) const + +{ + bool status = false; + int i , j; + int process_index , plot_offset , *state_min_index_parameter , *state_max_index_parameter , *pstate; + double buff , *state_min_value , *state_max_value; + ostringstream title , legend; + + + // computation of bounds + + state_min_index_parameter = new int[process->nb_state + 1]; + state_max_index_parameter = new int[process->nb_state + 1]; + + switch (type[0]) { + + case STATE : { + process_index = variable; + plot_offset = process->nb_state; + + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i < process->nb_state;i++) { + state_min_index_parameter[i] = max_length - 1; + state_max_index_parameter[i] = 0; + } + + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + + for (j = 0;j < length[i];j++) { + if (j < state_min_index_parameter[*pstate]) { + state_min_index_parameter[*pstate] = j; + } + if (j > state_max_index_parameter[*pstate]) { + state_max_index_parameter[*pstate] = j; + } + pstate++; + } + } + + state_min_index_parameter[process->nb_state] = 0; + state_max_index_parameter[process->nb_state] = max_length - 1; + break; + } + + case TIME : { + for (i = 0;i < process->nb_state;i++) { + state_min_index_parameter[i] = index_parameter_distribution->nb_value - 1; + state_max_index_parameter[i] = index_parameter_distribution->offset; + } + + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + + for (j = 0;j < length[i];j++) { + if (index_parameter[i][j] < state_min_index_parameter[*pstate]) { + state_min_index_parameter[*pstate] = index_parameter[i][j]; + } + if (index_parameter[i][j] > state_max_index_parameter[*pstate]) { + state_max_index_parameter[*pstate] = index_parameter[i][j]; + } + pstate++; + } + } + + state_min_index_parameter[process->nb_state] = index_parameter_distribution->offset; + state_max_index_parameter[process->nb_state] = index_parameter_distribution->nb_value - 1; + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + if (state_max_index_parameter[i] == state_min_index_parameter[i]) { + state_max_index_parameter[i]++; + } + + if (marginal_distribution[0]->frequency[i] == 0) { + switch (index_param_type) { + + case IMPLICIT_TYPE : { + state_min_index_parameter[i] = 0; + state_max_index_parameter[i] = max_length - 1; + break; + } + + case TIME : { + state_min_index_parameter[i] = index_parameter_distribution->offset; + state_max_index_parameter[i] = index_parameter_distribution->nb_value - 1; + break; + } + } + + buff = (min_value[variable] - process->observation[i]->intercept) / process->observation[i]->slope; + if ((process->observation[i]->slope > 0.) && (buff > state_min_index_parameter[i])) { + state_min_index_parameter[i] = ceil(buff); + } + if ((process->observation[i]->slope < 0.) && (buff < state_max_index_parameter[i])) { + state_max_index_parameter[i] = floor(buff); + } + + buff = (max_value[variable] - process->observation[i]->intercept) / process->observation[i]->slope; + if ((process->observation[i]->slope < 0.) && (buff > state_min_index_parameter[i])) { + state_min_index_parameter[i] = ceil(buff); + } + if ((process->observation[i]->slope > 0.) && (buff < state_max_index_parameter[i])) { + state_max_index_parameter[i] = floor(buff); + } + } + } + + state_min_value = new double[process->nb_state]; + state_max_value = new double[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + state_min_value[i] = max_value[variable]; + state_max_value[i] = min_value[variable]; + } + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + + for (j = 0;j < length[i];j++) { + if (int_sequence[i][variable][j] < state_min_value[*pstate]) { + state_min_value[*pstate] = int_sequence[i][variable][j]; + } + if (int_sequence[i][variable][j] > state_max_value[*pstate]) { + state_max_value[*pstate] = int_sequence[i][variable][j]; + } + pstate++; + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + + for (j = 0;j < length[i];j++) { + if (real_sequence[i][variable][j] < state_min_value[*pstate]) { + state_min_value[*pstate] = real_sequence[i][variable][j]; + } + if (real_sequence[i][variable][j] > state_max_value[*pstate]) { + state_max_value[*pstate] = real_sequence[i][variable][j]; + } + pstate++; + } + } + break; + } + } + break; + } + + default : { + process_index = variable + 1; + plot_offset = 0; + + switch (index_param_type) { + + case IMPLICIT_TYPE : { + for (i = 0;i <= process->nb_state;i++) { + state_min_index_parameter[i] = 0; + state_max_index_parameter[i] = max_length - 1; + } + break; + } + + case TIME : { + for (i = 0;i <= process->nb_state;i++) { + state_min_index_parameter[i] = index_parameter_distribution->offset; + state_max_index_parameter[i] = index_parameter_distribution->nb_value - 1; + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + buff = (min_value[variable] - process->observation[i]->intercept) / process->observation[i]->slope; + if ((process->observation[i]->slope > 0.) && (buff > state_min_index_parameter[i])) { + state_min_index_parameter[i] = ceil(buff); + } + if ((process->observation[i]->slope < 0.) && (buff < state_max_index_parameter[i])) { + state_max_index_parameter[i] = floor(buff); + } + + buff = (max_value[variable] - process->observation[i]->intercept) / process->observation[i]->slope; + if ((process->observation[i]->slope < 0.) && (buff > state_min_index_parameter[i])) { + state_min_index_parameter[i] = ceil(buff); + } + if ((process->observation[i]->slope > 0.) && (buff < state_max_index_parameter[i])) { + state_max_index_parameter[i] = floor(buff); + } + } + break; + } + } + + plot.variable_nb_viewpoint[variable] = 1; + + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << process_index; + + // linear function and observations for each state + + if (type[0] == STATE) { + for (i = 0;i < process->nb_state;i++) { + plot[index + i].title = title.str(); + + plot[index + i].xrange = Range(state_min_index_parameter[i] , state_max_index_parameter[i]); + plot[index + i].yrange = Range(state_min_value[i] , MAX(state_max_value[i] , state_min_value[i] + 1)); + + if (state_max_index_parameter[i] - state_min_index_parameter[i] < TIC_THRESHOLD) { + plot[index + i].xtics = 1; + } + if (state_max_value[i] - state_min_value[i] < TIC_THRESHOLD) { + plot[index + i].ytics = 1; + } + + plot[index + i].xlabel = SEQ_label[SEQL_INDEX]; + plot[index + i].ylabel = STAT_label[STATL_OBSERVATION]; + + plot[index + i].resize(2); + +/* legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION]; + plot[index + i][0].legend = legend.str(); */ + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION] << " " + << STAT_label[STATL_MODEL]; + plot[index + i][1].legend = legend.str(); + + plot[index + i][0].style = "points"; + plot[index + i][1].style = "lines"; + } + + switch (index_param_type) { + + case IMPLICIT_TYPE : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + for (j = 0;j < length[i];j++) { + plot[index + *pstate++][0].add_point(j , int_sequence[i][variable][j]); + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + for (j = 0;j < length[i];j++) { + plot[index + *pstate++][0].add_point(j , real_sequence[i][variable][j]); + } + } + break; + } + } + break; + } + + case TIME : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + for (j = 0;j < length[i];j++) { + plot[index + *pstate++][0].add_point(index_parameter[i][j] , int_sequence[i][variable][j]); + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + for (j = 0;j < length[i];j++) { + plot[index + *pstate++][0].add_point(index_parameter[i][j] , real_sequence[i][variable][j]); + } + } + break; + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + plot[index + i][1].add_point(state_min_index_parameter[i] , process->observation[i]->intercept + + process->observation[i]->slope * state_min_index_parameter[i]); + plot[index + i][1].add_point(state_max_index_parameter[i] , process->observation[i]->intercept + + process->observation[i]->slope * state_max_index_parameter[i]); + } + } + + // linear functions and pooled observations + + plot[index + plot_offset].title = title.str(); + + plot[index + plot_offset].xrange = Range(state_min_index_parameter[process->nb_state] , + state_max_index_parameter[process->nb_state]); + plot[index + plot_offset].yrange = Range(min_value[variable] , MAX(max_value[variable] , min_value[variable])); + + if (state_max_index_parameter[process->nb_state] - state_min_index_parameter[process->nb_state] < TIC_THRESHOLD) { + plot[index + plot_offset].xtics = 1; + } + if (max_value[variable] - min_value[variable] < TIC_THRESHOLD) { + plot[index + plot_offset].ytics = 1; + } + + plot[index + plot_offset].xlabel = SEQ_label[SEQL_INDEX]; + plot[index + plot_offset].ylabel = STAT_label[STATL_OBSERVATION]; + + plot[index + plot_offset].resize(process->nb_state + 1); + +/* legend.str(""); + legend << STAT_label[STATL_OBSERVATION]; + plot[index + plot_offset][0].legend = legend.str(); */ + + plot[index + plot_offset][0].style = "points"; + + for (i = 0;i < process->nb_state;i++) { + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " " << STAT_label[STATL_OBSERVATION] << " " + << STAT_label[STATL_MODEL]; + plot[index + plot_offset][i + 1].legend = legend.str(); + + plot[index + plot_offset][i + 1].style = "lines"; + } + + switch (index_param_type) { + + case IMPLICIT_TYPE : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + plot[index + plot_offset][0].add_point(j , int_sequence[i][variable][j]); + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + plot[index + plot_offset][0].add_point(j , real_sequence[i][variable][j]); + } + } + break; + } + } + break; + } + + case TIME : { + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + plot[index + plot_offset][0].add_point(index_parameter[i][j] , int_sequence[i][variable][j]); + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + plot[index + plot_offset][0].add_point(index_parameter[i][j] , real_sequence[i][variable][j]); + } + } + break; + } + } + break; + } + } + + for (i = 0;i < process->nb_state;i++) { + plot[index + plot_offset][i + 1].add_point(state_min_index_parameter[i] , process->observation[i]->intercept + + process->observation[i]->slope * state_min_index_parameter[i]); + plot[index + plot_offset][i + 1].add_point(state_max_index_parameter[i] , process->observation[i]->intercept + + process->observation[i]->slope * state_max_index_parameter[i]); + } + + switch (type[0]) { + case STATE : + index += process->nb_state + 1; + break; + default : + index++; + break; + } + + delete [] state_min_index_parameter; + delete [] state_max_index_parameter; + if (type[0] == STATE) { + delete [] state_min_value; + delete [] state_max_value; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a fitted autocorrelation function for an observation autoregressive model. + * + * \param[in,out] os stream, + * \param[in] variable variable index, + * \param[in] process pointer on a continuous observation process, + * \param[in] file_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& MarkovianSequences::autoregressive_model_ascii_print(ostream &os , int variable , + ContinuousParametricProcess *process , + bool file_flag) const + +{ + int i , j; + int max_lag , width[5]; + double standard_normal_value , *confidence_limit; + Correlation *correl; + normal dist; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + max_lag = max_length * (1. - AUTOCORRELATION_FREQUENCY_RATIO); + correl = new Correlation(2 , max_lag + 1 , true , PEARSON); + confidence_limit = new double[max_lag + 1]; + standard_normal_value = quantile(complement(dist , 0.025)); + + os << "\n"; + for (i = 0;i < process->nb_state;i++) { + autocorrelation_computation(*correl , i , variable); + + if (correl->length > 0) { + correl->point[1][0] = 1.; + for (j = 1;j < correl->length;j++) { + correl->point[1][j] = correl->point[1][j - 1] * process->observation[i]->autoregressive_coeff; + } + for (j = 0;j < correl->length;j++) { + confidence_limit[j] = standard_normal_value / sqrt((double)(correl->frequency[j])); + } + + // computation of the column widths + + width[0] = column_width(correl->length - 1); + width[1] = column_width(correl->length , correl->point[0]) + ASCII_SPACE; + width[2] = column_width(correl->length , correl->point[1]) + ASCII_SPACE; + width[3] = column_width(correl->length , confidence_limit) + ASCII_SPACE; + width[4] = column_width(correl->frequency[0]) + ASCII_SPACE; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_LAG] << " | " << STAT_label[STATL_STATE] << " " << i << " " + << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_AUTO] << SEQ_label[SEQL_CORRELATION_FUNCTION] + << " | " << STAT_label[STATL_STATE] << " " << i << " " + << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_AUTO] << SEQ_label[SEQL_CORRELATION_FUNCTION] + << " | " << SEQ_label[SEQL_RANDOMNESS_95_CONFIDENCE_LIMIT] + << " | " << STAT_label[STATL_FREQUENCY] << endl; + + for (j = 0;j < correl->length;j++) { + if (file_flag) { + os << "# "; + } + os << setw(width[0]) << j; + os << setw(width[1]) << correl->point[0][j]; + os << setw(width[2]) << correl->point[1][j]; + os << setw(width[3]) << confidence_limit[j]; + os << setw(width[4]) << correl->frequency[j] << endl; + } + os << endl; + } + + correl->length = max_lag + 1; + } + + delete correl; + delete [] confidence_limit; + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a fitted autocorrelation function for + * an observation autoregressive model at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] variable variable index, + * \param[in] process pointer on a continuous observation process. + */ +/*--------------------------------------------------------------*/ + +ostream& MarkovianSequences::autoregressive_model_spreadsheet_print(ostream &os , int variable , + ContinuousParametricProcess *process) const + +{ + int i , j; + int max_lag; + double standard_normal_value , confidence_limit; + Correlation *correl; + normal dist; + + + max_lag = max_length * (1. - AUTOCORRELATION_FREQUENCY_RATIO); + correl = new Correlation(2 , max_lag + 1 , true , PEARSON); + standard_normal_value = quantile(complement(dist , 0.025)); + + os << "\n"; + for (i = 0;i < process->nb_state;i++) { + autocorrelation_computation(*correl , i , variable); + + if (correl->length > 0) { + correl->point[1][0] = 1.; + for (j = 1;j < correl->length;j++) { + correl->point[1][j] = correl->point[1][j - 1] * process->observation[i]->autoregressive_coeff; + } + + os << SEQ_label[SEQL_LAG] << "\t" << STAT_label[STATL_STATE] << " " << i << " " + << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_AUTO] << SEQ_label[SEQL_CORRELATION_FUNCTION] + << "\t" << STAT_label[STATL_STATE] << " " << i << " " + << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_AUTO] << SEQ_label[SEQL_CORRELATION_FUNCTION] + << "\t" << SEQ_label[SEQL_RANDOMNESS_95_CONFIDENCE_LIMIT] + << "\t" << SEQ_label[SEQL_RANDOMNESS_95_CONFIDENCE_LIMIT] + << "\t" << STAT_label[STATL_FREQUENCY] << endl; + + for (j = 0;j < correl->length;j++) { + confidence_limit = standard_normal_value / sqrt((double)(correl->frequency[j])); + + os << j << "\t" << correl->point[0][j] << "\t" << correl->point[1][j] << "\t" + << confidence_limit << "\t" << -confidence_limit << "\t" << correl->frequency[j] << endl; + } + os << endl; + } + + correl->length = max_lag + 1; + } + + delete correl; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a fitted autocorrelation function for + * an observation autoregressive model using Gnuplot. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] variable variable index, + * \param[in] process pointer on a continuous observation process. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::autoregressive_model_plot_print(const char *prefix , const char *title , int variable , + ContinuousParametricProcess *process) const + +{ + bool status = false , start; + int i , j; + int max_lag; + double standard_normal_value , confidence_limit; + Correlation **correl; + normal dist; + ostringstream data_file_name[NB_STATE]; + ofstream *out_data_file[NB_STATE]; + + + max_lag = max_length * (1. - AUTOCORRELATION_FREQUENCY_RATIO); + standard_normal_value = quantile(complement(dist , 0.025)); + correl = new Correlation*[process->nb_state]; + + for (i = 0;i < process->nb_state;i++) { + correl[i] = new Correlation(2 , max_lag + 1 , true , PEARSON); + + autocorrelation_computation(*correl[i] , i , variable); + + if (correl[i]->length > 0) { + + // writing of data files + + correl[i]->point[1][0] = 1.; + for (j = 1;j < correl[i]->length;j++) { + correl[i]->point[1][j] = correl[i]->point[1][j - 1] * process->observation[i]->autoregressive_coeff; + } + + data_file_name[i] << prefix << variable << i << ".dat"; + out_data_file[i] = new ofstream((data_file_name[i].str()).c_str()); + + if (out_data_file[i]) { + status = true; + + for (j = 0;j < correl[i]->length;j++) { + confidence_limit = standard_normal_value / sqrt((double)(correl[i]->frequency[j])); + + *out_data_file[i] << j << " " << correl[i]->point[0][j] << " " << correl[i]->point[1][j] << " " + << confidence_limit << " " << -confidence_limit << " " << correl[i]->frequency[j] << endl; + } + } + } + } + + if (status) { + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << variable << ".plot"; + break; + case 1 : + file_name[0] << prefix << variable << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << process << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title \""; + if (title) { + out_file << title << " - "; + } + out_file << STAT_label[STATL_OUTPUT_PROCESS] << " " << variable << "\"\n\n"; + + start = true; + for (j = 0;j < process->nb_state;j++) { + if (correl[j]->length > 0) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + out_file << "set xlabel \"" << SEQ_label[SEQL_LAG] << "\"" << endl; + + if (correl[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [" << 0 << ":" << correl[j]->length - 1 << "] [-1:1] " + << "\"" << label((data_file_name[j].str()).c_str()) << "\" using 1:2 title \"" + << STAT_label[STATL_STATE] << " " << j << " " << SEQ_label[SEQL_OBSERVED] << " " + << SEQ_label[SEQL_AUTO] << SEQ_label[SEQL_CORRELATION_FUNCTION] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[j].str()).c_str()) << "\" using 1:3 title \"" + << STAT_label[STATL_STATE] << " " << j << " " << SEQ_label[SEQL_THEORETICAL] << " " + << SEQ_label[SEQL_AUTO] << SEQ_label[SEQL_CORRELATION_FUNCTION] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[j].str()).c_str()) << "\" using 1:4 notitle with lines,\\" << endl; + out_file << "\"" << label((data_file_name[j].str()).c_str()) << "\" using 1:5 notitle with lines" << endl; + + out_file << "set xlabel" << endl; + + if (correl[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + for (i = 0;i < process->nb_state;i++) { + delete correl[i]; + } + delete [] correl; + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a fitted autocorrelation function for an observation autoregressive model. + * + * \param[in] plot file prefix, + * \param[in] index MultiPlot index, + * \param[in] variable variable index, + * \param[in] process pointer on a continuous observation process. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::autoregressive_model_plotable_write(MultiPlotSet &plot , int &index , int variable , + ContinuousParametricProcess *process) const + +{ + bool status = false; + int i , j; + int max_lag; + double standard_normal_value , confidence_limit; + Correlation *correl; + normal dist; + ostringstream title , legend; + + + max_lag = max_length * (1. - AUTOCORRELATION_FREQUENCY_RATIO); + correl = new Correlation(2 , max_lag + 1 , true , PEARSON); + standard_normal_value = quantile(complement(dist , 0.025)); + + plot.variable_nb_viewpoint[variable] = 1; + + title.str(""); + title << STAT_label[STATL_OUTPUT_PROCESS] << " " << variable; + + for (i = 0;i < process->nb_state;i++) { + autocorrelation_computation(*correl , i , variable); + + if (correl->length > 0) { + correl->point[1][0] = 1.; + for (j = 1;j < correl->length;j++) { + correl->point[1][j] = correl->point[1][j - 1] * process->observation[i]->autoregressive_coeff; + } + + plot[index].title = title.str(); + + plot[index].xrange = Range(0 , correl->length - 1); + plot[index].yrange = Range(-1 , 1); + + if (correl->length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].xlabel = SEQ_label[SEQL_LAG]; + plot[index].ylabel = STAT_label[STATL_CORRELATION_COEFF]; + + plot[index].resize(4); + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << SEQ_label[SEQL_OBSERVED] << " " + << SEQ_label[SEQL_AUTO] << SEQ_label[SEQL_CORRELATION_FUNCTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "linespoints"; + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << SEQ_label[SEQL_THEORETICAL] << " " + << SEQ_label[SEQL_AUTO] << SEQ_label[SEQL_CORRELATION_FUNCTION]; + plot[index][1].legend = legend.str(); + + plot[index][1].style = "linespoints"; + + plot[index][2].style = "lines"; + plot[index][3].style = "lines"; + + for (j = 0;j < correl->length;j++) { + confidence_limit = standard_normal_value / sqrt((double)(correl->frequency[j])); + + plot[index][0].add_point(j , correl->point[0][j]); + plot[index][1].add_point(j , correl->point[1][j]); + plot[index][2].add_point(j , confidence_limit); + plot[index][3].add_point(j , -confidence_limit); + } + + index++; + } + + correl->length = max_lag + 1; + } + + delete correl; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a MarkovianSequences object in a file at the MTG format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] itype variable types (NOMINAL/NUMERIC). + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::mtg_write(StatError &error , const string path , variable_type *itype) const + +{ + bool status; + int i , j , k , m; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + + // writing of the header + + out_file << "CODE:\tFORM-A" << endl; + + out_file << "\nCLASSES:\nSYMBOL\tSCALE\tDECOMPOSITION\tINDEXATION\tDEFINITION" << endl; + out_file << "$\t0\tFREE\tFREE\tIMPLICIT" << endl; + out_file << "U\t1\tFREE\tFREE\tIMPLICIT" << endl; + out_file << "E\t2\tFREE\tFREE\tIMPLICIT" << endl; + + for (i = 0;i < nb_variable;i++) { + switch (itype[i]) { + + case NOMINAL : { + for (j = 1;j < marginal_distribution[i]->nb_value;j++) { + out_file << (char)('F' + j) << "\t2\tFREE\tFREE\tIMPLICIT" << endl; + } + break; + } + + case NUMERIC : { + out_file << "F\t2\tFREE\tFREE\tIMPLICIT" << endl; + break; + } + } + } + + out_file << "\nDESCRIPTION:\nLEFT\tRIGHT\tRELTYPE\tMAX" << endl; + out_file << "E\tE\t<\t1" << endl; + + for (i = 0;i < nb_variable;i++) { + switch (itype[i]) { + + case NOMINAL : { + out_file << "E\t"; + for (j = 1;j < marginal_distribution[i]->nb_value;j++) { + out_file << (char)('F' + j); + if (j < marginal_distribution[i]->nb_value - 1) { + out_file << ","; + } + } + out_file << "\t+\t1" << endl; + break; + } + + case NUMERIC : { + out_file << "E\tF\t+\t?" << endl; + break; + } + } + } + + out_file << "\nFEATURES:\nNAME\tTYPE" << endl; + + // writing of the topological code + + out_file << "\nMTG:\nENTITY-CODE\n" << endl; + + for (i = 0;i < nb_sequence;i++) { + out_file << "/U" << i + 1 << endl; + + for (j = 0;j < length[i];j++) { + if (j == 0) { + out_file << "\t/"; + } + else { + out_file << "\t^<"; + } + out_file << 'E' << j + 1 << endl; + + for (k = 0;k < nb_variable;k++) { + switch (itype[k]) { + + case NOMINAL : { + if (int_sequence[i][k][j] > 0) { + out_file <<"\t\t+" << (char)('F' + int_sequence[i][k][j]) << 1 << endl; + } + break; + } + + case NUMERIC : { + for (m = 0;m < int_sequence[i][k][j];m++) { + out_file <<"\t\t+F" << m + 1 << endl; + } + break; + } + } + } + } + } + } + + return status; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/nhmc_algorithms.cpp b/src/cpp/sequence_analysis/nhmc_algorithms.cpp new file mode 100644 index 0000000..1d71861 --- /dev/null +++ b/src/cpp/sequence_analysis/nhmc_algorithms.cpp @@ -0,0 +1,1567 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include "stat_tool/stat_label.h" + +#include "nonhomogeneous_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Update of the transition distribution of a state for a nonhomogeneous Markov chain. + * + * \param[in] state state, + * \param[in] index sequence index, + * \param[in] index_chain reference on the transition probabilities. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::transition_update(int state , int index , Chain &index_chain) const + + +{ + int i; + double scale , *pparam; + + + pparam = self_transition[state]->parameter; + + // update of the self-transition probability + + switch (self_transition[state]->ident) { + case LOGISTIC : + index_chain.transition[state][state] = pparam[0] / (1. + pparam[1] * exp(-pparam[2] * index)); + break; + case MONOMOLECULAR : + index_chain.transition[state][state] = pparam[0] + pparam[1] * exp(-pparam[2] * index); + break; + } + + // update of the state change probabilities + + scale = (1. - index_chain.transition[state][state]) / (1. - transition[state][state]); + for (i = 0;i < nb_state;i++) { + if (i != state) { + index_chain.transition[state][i] = scale * transition[state][i]; + } + } + + if (index_chain.cumul_transition) { + stat_tool::cumul_computation(nb_state , index_chain.transition[state] , + index_chain.cumul_transition[state]); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state probabilities as a function of + * the index parameter for a nonhomogeneous Markov chain. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::index_state_distribution() + +{ + int i , j , k; + Curves *index_state; + Chain *index_chain; + + + index_state = process->index_value; + + // initialization of the transition probability matrix + + index_chain = new Chain(*this); + + // initialization of the state probabilities + + for (i = 0;i < nb_state;i++) { + index_state->point[i][0] = initial[i]; + } + + for (i = 1;i < index_state->length;i++) { + + // change in transition probabilities with the index parameter + + for (j = 0;j < nb_state;j++) { + if (!homogeneity[j]) { + transition_update(j , i - 1 , *index_chain); + } + } + + // computation of the state probabilities + + for (j = 0;j < nb_state;j++) { + index_state->point[j][i] = 0.; + for (k = 0;k < nb_state;k++) { + index_state->point[j][i] += index_chain->transition[k][j] * index_state->point[k][i - 1]; + } + } + } + + delete index_chain; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of not visiting a state + * for a nonhomogeneous Markov chain. + * + * \param[in] state state, + * \param[in] increment threshold on the sum of the state probabilities. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::state_no_occurrence_probability(int state , double increment) + +{ + int i; + + for (i = 0;i < nb_state;i++) { + if ((i != state) && (!accessibility[i][state])) { + break; + } + } + + if (i < nb_state) { + int j , k; + double state_sum , *current_state , *previous_state , + &no_occurrence = process->no_occurrence[state]; + Chain *index_chain; + + + // initialization of the transition probability matrix + + index_chain = new Chain(*this); + + // initialization of the state probabilities + + current_state = new double[nb_state]; + previous_state = new double[nb_state]; + + state_sum = 0.; + no_occurrence = 0.; + + for (i = 0;i < nb_state;i++) { + if (i != state) { + if (accessibility[i][state]) { + current_state[i] = initial[i]; + state_sum += current_state[i]; + } + else { + current_state[i] = 0.; + no_occurrence += initial[i]; + } + } + } + + i = 1; + + while ((state_sum > increment) || (i < nb_state - 1)) { + + // change in transition probabilities with the index parameter + + for (j = 0;j < nb_state;j++) { + if (!homogeneity[j]) { + transition_update(j , i - 1 , *index_chain); + } + } + + // update of the state probabilities + + for (j = 0;j < nb_state;j++) { + previous_state[j] = current_state[j]; + } + + // computation of the state probabilities and update of + // the probability of not visiting the selected state + + state_sum = 0.; + + for (j = 0;j < nb_state;j++) { + if (j != state) { + if (accessibility[j][state]) { + current_state[j] = 0.; + + for (k = 0;k < nb_state;k++) { + if (k != state) { + current_state[j] += index_chain->transition[k][j] * previous_state[k]; + } + } + + state_sum += current_state[j]; + } + + else { + for (k = 0;k < nb_state;k++) { + if (k != state) { + no_occurrence += index_chain->transition[k][j] * previous_state[k]; + } + } + } + } + } + + i++; + } + + delete index_chain; + delete [] current_state; + delete [] previous_state; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the time to the 1st occurrence of a state + * for a nonhomogeneous Markov chain. + * + * \param[in] state state, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::state_first_occurrence_distribution(int state , int min_nb_value , + double cumul_threshold) + +{ + int i , j , k; + double *current_state , *previous_state , *pmass , *pcumul; + Chain *index_chain; + Distribution *first_occurrence; + + + first_occurrence = process->first_occurrence[state]; + first_occurrence->complement = process->no_occurrence[state]; + + pmass = first_occurrence->mass; + pcumul = first_occurrence->cumul; + + // initialization of the transition probability matrix + + index_chain = new Chain(*this); + + // initialization of the state probabilities + + current_state = new double[nb_state]; + previous_state = new double[nb_state]; + + for (i = 0;i < nb_state;i++) { + if (i != state) { + current_state[i] = initial[i]; + } + else { + *pmass = initial[i]; + } + } + *pcumul = *pmass; + + i = 1; + + while (((*pcumul < cumul_threshold - first_occurrence->complement) || (i < min_nb_value)) && + (i < first_occurrence->alloc_nb_value)) { + + // change in transition probabilities with the index parameter + + for (j = 0;j < nb_state;j++) { + if (!homogeneity[j]) { + transition_update(j , i - 1 , *index_chain); + } + } + + // update of the state probabilities + + for (j = 0;j < nb_state;j++) { + previous_state[j] = current_state[j]; + } + + // computation of the state probabilities and the current probabilty mass + + *++pmass = 0.; + + for (j = 0;j < nb_state;j++) { + if (j != state) { + current_state[j] = 0.; + + for (k = 0;k < nb_state;k++) { + if (k != state) { + current_state[j] += index_chain->transition[k][j] * previous_state[k]; + } + } + } + + else { + for (k = 0;k < nb_state;k++) { + if (k != state) { + *pmass += index_chain->transition[k][j] * previous_state[k]; + } + } + } + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + + first_occurrence->nb_value = i; + +# ifdef DEBUG + if (first_occurrence->complement > 0.) { + cout << "\n" << SEQ_label[SEQL_NO_OCCURRENCE] << " " << state << " : " + << first_occurrence->complement << " | " + << 1. - first_occurrence->cumul[first_occurrence->nb_value - 1] << endl; + } +# endif + + first_occurrence->offset_computation(); + first_occurrence->max_computation(); + first_occurrence->mean_computation(); + first_occurrence->variance_computation(); + + delete index_chain; + delete [] current_state; + delete [] previous_state; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mixture of the distributions of the number of runs (RUN) or + * occurrences (OCCURRENCE) of a state for a sequence length mixing distribution and + * a nonhomogeneous Markov chain. + * + * \param[in] state state, + * \param[in] pattern count pattern type. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::state_nb_pattern_mixture(int state , count_pattern pattern) + +{ + int i , j , k , m; + int max_length , nb_pattern , index_nb_pattern , increment; + double sum , **current_state , **previous_state , *cstate , *pstate , *pmass , *lmass; + Distribution *pdist; + Chain *index_chain; + + + max_length = process->length->nb_value - 1; + + switch (pattern) { + case RUN : + pdist = process->nb_run[state]; + nb_pattern = max_length / 2 + 2; + break; + case OCCURRENCE : + pdist = process->nb_occurrence[state]; + nb_pattern = max_length + 1; + break; + } + + pmass = pdist->mass; + for (i = 0;i < pdist->nb_value;i++) { + *pmass++ = 0.; + } + + // initialization of the transition probability matrix + + index_chain = new Chain(*this); + + current_state = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + current_state[i] = new double[nb_pattern]; + } + + previous_state = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + previous_state[i] = new double[nb_pattern]; + } + + lmass = process->length->mass; + index_nb_pattern = 1; + + for (i = 0;i < max_length;i++) { + + // initialization of the state probabilities for a number of runs or occurrences of + // the selected state + + if (i == 0) { + for (j = 0;j < nb_state;j++) { + if (j == state) { + current_state[j][0] = 0.; + current_state[j][1] = initial[j]; + } + else { + current_state[j][0] = initial[j]; + current_state[j][1] = 0.; + } + } + } + + else { + + // change in transition probabilities with the index parameter + + for (j = 0;j < nb_state;j++) { + if (!homogeneity[j]) { + transition_update(j , i - 1 , *index_chain); + } + } + + // update of the state probabilities + + for (j = 0;j < nb_state;j++) { + for (k = 0;k < index_nb_pattern;k++) { + previous_state[j][k] = current_state[j][k]; + current_state[j][k] = 0.; + } + current_state[j][index_nb_pattern] = 0.; + } + + for (j = 0;j < nb_state;j++) { + + // computation of the state probabilities for each number of runs or occurrences of + // the selected state + + for (k = 0;k < nb_state;k++) { + switch (pattern) { + case RUN : + increment = (((k != state) && (j == state)) ? 1 : 0); + break; + case OCCURRENCE : + increment = (j == state ? 1 : 0); + break; + } + + cstate = current_state[j]; + pstate = previous_state[k]; + + if (increment == 1) { + cstate++; + } + for (m = 0;m < index_nb_pattern;m++) { + *cstate++ += index_chain->transition[k][j] * *pstate++; + } + } + } + } + + if ((pattern == OCCURRENCE) || (i % 2 == 0)) { + index_nb_pattern++; + } + + // update of the mixture of the distributions of the number of runs or + // occurrences of the selected state + + if (*++lmass > 0.) { + pmass = pdist->mass; + for (j = 0;j < index_nb_pattern;j++) { + sum = 0.; + for (k = 0;k < nb_state;k++) { + sum += current_state[k][j]; + } + *pmass++ += *lmass * sum; + } + } + } + + pdist->nb_value_computation(); + pdist->offset_computation(); + pdist->cumul_computation(); + + pdist->max_computation(); + pdist->mean_computation(); + pdist->variance_computation(); + + delete index_chain; + + for (i = 0;i < nb_state;i++) { + delete [] current_state[i]; + } + delete [] current_state; + + for (i = 0;i < nb_state;i++) { + delete [] previous_state[i]; + } + delete [] previous_state; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the characteristic distributions of a NonhomogeneousMarkov object. + * + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::characteristic_computation(int length , bool counting_flag) + +{ + if (nb_component > 0) { + int i; + DiscreteParametric dlength(UNIFORM , length , length , D_DEFAULT , D_DEFAULT); + + + if ((!(process->length)) || (dlength != *(process->length))) { + process->create_characteristic(dlength , homogeneity , counting_flag); + + index_state_distribution(); + + for (i = 0;i < nb_state;i++) { + state_no_occurrence_probability(i); + state_first_occurrence_distribution(i); + + if (homogeneity[i]) { + if (stype[i] != ABSORBING) { + process->sojourn_time[i]->init(NEGATIVE_BINOMIAL , 1 , I_DEFAULT , 1. , + 1. - transition[i][i]); + process->sojourn_time[i]->computation(1 , OCCUPANCY_THRESHOLD); + process->sojourn_time[i]->ident = CATEGORICAL; + } + + else { + process->absorption[i] = 1.; + delete process->sojourn_time[i]; + process->sojourn_time[i] = NULL; + } + } + + if (counting_flag) { + state_nb_pattern_mixture(i , RUN); + state_nb_pattern_mixture(i , OCCURRENCE); + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the characteristic distributions of a NonhomogeneousMarkov object. + * + * \param[in] seq reference on a NonhomogeneousMarkovData object, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] length_flag flag on the sequence length. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::characteristic_computation(const NonhomogeneousMarkovData &seq , + bool counting_flag , bool length_flag) + +{ + if (nb_component > 0) { + int i; + Distribution dlength(*(seq.length_distribution)); + + + if ((!length_flag) || ((length_flag) && ((!(process->length)) || + (dlength != *(process->length))))) { + process->create_characteristic(dlength , homogeneity , counting_flag); + + index_state_distribution(); + + for (i = 0;i < nb_state;i++) { + state_no_occurrence_probability(i); + state_first_occurrence_distribution(i , seq.characteristics[0]->first_occurrence[i]->nb_value); + + if (homogeneity[i]) { + if (stype[i] != ABSORBING) { + process->sojourn_time[i]->init(NEGATIVE_BINOMIAL , 1 , I_DEFAULT , 1. , + 1. - transition[i][i]); + process->sojourn_time[i]->computation(seq.characteristics[0]->sojourn_time[i]->nb_value , + OCCUPANCY_THRESHOLD); + process->sojourn_time[i]->ident = CATEGORICAL; + } + + else { + process->absorption[i] = 1.; + delete process->sojourn_time[i]; + process->sojourn_time[i] = NULL; + } + } + + if (counting_flag) { + state_nb_pattern_mixture(i , RUN); + state_nb_pattern_mixture(i , OCCURRENCE); + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the variation explained by the self-transition probability function. + * + * \param[in] mean mean. + * + * \return regression square sum. + */ +/*--------------------------------------------------------------*/ + +double Function::regression_square_sum_computation(double mean) const + +{ + int i; + int *pfrequency; + double regression_square_sum , diff , *ppoint; + + + pfrequency = frequency; + ppoint = point; + regression_square_sum = 0.; + + for (i = 0;i <= max_value;i++) { + if (*pfrequency > 0) { + diff = *ppoint - mean; + regression_square_sum += *pfrequency * diff * diff; + } + pfrequency++; + ppoint++; + } + + return regression_square_sum; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the residuals of a self-transition probability function. + * + * \param[in] self_transition reference on the self-transition probability function for a state. + */ +/*--------------------------------------------------------------*/ + +void Function::residual_computation(const SelfTransition &self_transition) + +{ + int i; + int *pfrequency , *sfrequency; + double *presidual , *ppoint , *spoint; + + + pfrequency = frequency; + sfrequency = self_transition.frequency; + presidual = residual; + ppoint = point; + spoint = self_transition.point[0]; + + for (i = 0;i <= max_value;i++) { + *pfrequency = *sfrequency++; + if (*pfrequency++ > 0) { + *presidual++ = *spoint - *ppoint; + } + else { + *presidual++ = -D_INF; + } + ppoint++; + spoint++; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mean of the residuals of a self-transition probability function. + * + * \return residual mean. + */ +/*--------------------------------------------------------------*/ + +double Function::residual_mean_computation() const + +{ + int i; + int nb_element , *pfrequency; + double residual_mean , *presidual; + + + pfrequency = frequency; + presidual = residual; + nb_element = 0; + residual_mean = 0.; + + for (i = 0;i <= max_value;i++) { + if (*pfrequency > 0) { + nb_element += *pfrequency; + residual_mean += *pfrequency * *presidual; + } + pfrequency++; + presidual++; + } + residual_mean /= nb_element; + + return residual_mean; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the variance of the residuals of a self-transition probability function. + * + * \param[in] residual_mean residual mean. + * + * \return residual variance. + */ +/*--------------------------------------------------------------*/ + +double Function::residual_variance_computation(double residual_mean) const + +{ + int i; + int *pfrequency; + double residual_variance = D_DEFAULT , diff , *presidual; + + + if (residual_df > 0.) { + pfrequency = frequency; + presidual = residual; + residual_variance = 0.; + + for (i = 0;i <= max_value;i++) { + if (*pfrequency > 0) { + diff = *presidual - residual_mean; + residual_variance += *pfrequency * diff * diff; + } + pfrequency++; + presidual++; + } + + residual_variance /= residual_df; + } + + return residual_variance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the sum of squared residuals of a self-transition probability function. + * + * \return residual square sum. + */ +/*--------------------------------------------------------------*/ + +double Function::residual_square_sum_computation() const + +{ + int i; + int *pfrequency; + double residual_square_sum , *presidual; + + + pfrequency = frequency; + presidual = residual; + residual_square_sum = 0.; + + for (i = 0;i <= max_value;i++) { + if (*pfrequency > 0) { + residual_square_sum += *pfrequency * *presidual * *presidual; + } + pfrequency++; + presidual++; + } + + return residual_square_sum; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of the parameters of the logistic function y = a / (1 + b * exp(-c * x)). + */ +/*--------------------------------------------------------------*/ + +Function* SelfTransition::logistic_regression() const + +{ + int i; + int iter , nb_element = nb_element_computation() , norm , init_nb_element , *pfrequency; + double start_proba , denom , residual , residual_square_sum = -D_INF , previous_residual_square_sum , + correction[3] , *ppoint; + Function *function; + + + function = new Function(LOGISTIC , length); + + function->regression_df = function->nb_parameter; + function->residual_df = nb_element - function->nb_parameter; + + // parameter initialization + + init_nb_element = (int)(START_RATIO * nb_element); + init_nb_element = MAX(init_nb_element , REGRESSION_NB_ELEMENT / 4); + + pfrequency = frequency; + ppoint = point[0]; + start_proba = 0.; + i = 0; + + do { + start_proba += *pfrequency * *ppoint++; + i += *pfrequency++; + } + while (i < init_nb_element); + start_proba /= i; + + init_nb_element = (int)(END_RATIO * nb_element); + init_nb_element = MAX(init_nb_element , REGRESSION_NB_ELEMENT / 4); + + pfrequency = frequency + length; + ppoint = point[0] + length; + function->parameter[0] = 0.; + i = 0; + + do { + function->parameter[0] += *--pfrequency * *--ppoint; + i += *pfrequency; + } + while (i < init_nb_element); + function->parameter[0] /= i; + function->parameter[1] = function->parameter[0] / start_proba - 1.; + + pfrequency = frequency + 1; + ppoint = point[0] + 1; + function->parameter[2] = 0.; + norm = 0; + for (i = 1;i < length;i++) { + if ((*pfrequency > 0) && ((function->parameter[0] / *ppoint - 1.) / function->parameter[1] > 0.)) { + function->parameter[2] -= *pfrequency * log((function->parameter[0] / *ppoint - 1.) / function->parameter[1]) / i; + norm += *pfrequency; + } + pfrequency++; + ppoint++; + } + function->parameter[2] /= norm; + +# ifdef DEBUG + cout << "\n"; + function->ascii_parameter_print(cout); + cout << endl; +# endif + + // least-square iterations + + iter = 0; + do { + iter++; + previous_residual_square_sum = residual_square_sum; + + pfrequency = frequency; + ppoint = point[0]; + residual_square_sum = 0.; + + for (i = 0;i < function->nb_parameter;i++) { + correction[i] = 0.; + } + + for (i = 0;i < length;i++) { + if (*pfrequency > 0) { + denom = 1. + function->parameter[1] * exp(-function->parameter[2] * i); + residual = *ppoint - function->parameter[0] / denom; + residual_square_sum += *pfrequency * residual * residual; + correction[0] += *pfrequency * residual / denom; + correction[1] -= *pfrequency * residual * function->parameter[0] * exp(-function->parameter[2] * i) / + (denom * denom); + if (i > 0) { + correction[2] += *pfrequency * residual * function->parameter[0] * function->parameter[1] * i * + exp(-function->parameter[2] * i) / (denom * denom); + } + } + pfrequency++; + ppoint++; + } + residual_square_sum /= nb_element; + + function->parameter[0] += GRADIENT_DESCENT_COEFF * correction[0] / nb_element; + function->parameter[1] += GRADIENT_DESCENT_COEFF * correction[1] / nb_element; + function->parameter[2] += GRADIENT_DESCENT_COEFF * correction[2] / (nb_element - frequency[0]); + + // application of thresholds on parameters + + if (function->parameter[0] < MIN_PROBABILITY) { + function->parameter[0] = MIN_PROBABILITY; + } + if (function->parameter[0] > 1. - MIN_PROBABILITY) { + function->parameter[0] = 1. - MIN_PROBABILITY; + } + if (function->parameter[0] / (1. + function->parameter[1]) < MIN_PROBABILITY) { + function->parameter[1] = function->parameter[0] / MIN_PROBABILITY - 1.; + } + if (function->parameter[0] / (1. + function->parameter[1]) > 1. - MIN_PROBABILITY) { + function->parameter[1] = function->parameter[0] / (1. - MIN_PROBABILITY) - 1.; + } + +# ifdef DEBUG + if ((iter < 10) || (iter % 10 == 0)) { + function->ascii_parameter_print(cout); + cout << "\niteration " << iter << ", " << residual_square_sum << " | " + << (previous_residual_square_sum - residual_square_sum) / residual_square_sum << endl; + } +# endif + + } + while (((previous_residual_square_sum - residual_square_sum) / residual_square_sum > RESIDUAL_SQUARE_SUM_DIFF) && + (iter < REGRESSION_NB_ITER)); + + // computation of the logistic function and the residuals + + function->computation(); + function->residual_computation(*this); + + return function; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of the parameters of the monomolecular function y = a + b * exp(-c * x). + */ +/*--------------------------------------------------------------*/ + +Function* SelfTransition::monomolecular_regression() const + +{ + int i; + int iter , nb_element = nb_element_computation() , norm , init_nb_element , *pfrequency; + double start_proba , residual , residual_square_sum = -D_INF , previous_residual_square_sum , + correction[3] , *ppoint; + Function *function; + + + function = new Function(MONOMOLECULAR , length); + + function->regression_df = function->nb_parameter; + function->residual_df = nb_element - function->nb_parameter; + + // parameter initialization + + init_nb_element = (int)(START_RATIO * nb_element); + init_nb_element = MAX(init_nb_element , REGRESSION_NB_ELEMENT / 4); + + pfrequency = frequency; + ppoint = point[0]; + start_proba = 0.; + i = 0; + + do { + start_proba += *pfrequency * *ppoint++; + i += *pfrequency++; + } + while (i < init_nb_element); + start_proba /= i; + + init_nb_element = (int)(END_RATIO * nb_element); + init_nb_element = MAX(init_nb_element , REGRESSION_NB_ELEMENT / 4); + + pfrequency = frequency + length; + ppoint = point[0] + length; + function->parameter[0] = 0.; + i = 0; + + do { + function->parameter[0] += *--pfrequency * *--ppoint; + i += *pfrequency; + } + while (i < init_nb_element); + function->parameter[0] /= i; + function->parameter[1] = start_proba - function->parameter[0]; + + pfrequency = frequency + 1; + ppoint = point[0] + 1; + function->parameter[2] = 0.; + norm = 0; + for (i = 1;i < length;i++) { + if ((*pfrequency > 0) && ((*ppoint - function->parameter[0]) / function->parameter[1] > 0.)) { + function->parameter[2] -= *pfrequency * log((*ppoint - function->parameter[0]) / function->parameter[1]) / i; + norm += *pfrequency; + } + pfrequency++; + ppoint++; + } + function->parameter[2] /= norm; + +# ifdef DEBUG + cout << "\n"; + function->ascii_parameter_print(cout); + cout << endl; +# endif + + // least-square iterations + + iter = 0; + do { + iter++; + previous_residual_square_sum = residual_square_sum; + + pfrequency = frequency; + ppoint = point[0]; + residual_square_sum = 0.; + + for (i = 0;i < function->nb_parameter;i++) { + correction[i] = 0.; + } + + for (i = 0;i < length;i++) { + if (*pfrequency > 0) { + residual = *ppoint - (function->parameter[0] + function->parameter[1] * + exp(-function->parameter[2] * i)); + residual_square_sum += *pfrequency * residual * residual; + correction[0] += *pfrequency * residual; + correction[1] += *pfrequency * residual * exp(-function->parameter[2] * i); + if (i > 0) { + correction[2] -= *pfrequency * residual * function->parameter[1] * i * + exp(-function->parameter[2] * i); + } + } + pfrequency++; + ppoint++; + } + residual_square_sum /= nb_element; + + function->parameter[0] += GRADIENT_DESCENT_COEFF * correction[0] / nb_element; + function->parameter[1] += GRADIENT_DESCENT_COEFF * correction[1] / nb_element; + function->parameter[2] += GRADIENT_DESCENT_COEFF * correction[2] / (nb_element - frequency[0]); + + // application of thresholds on parameters + + if (function->parameter[0] < MIN_PROBABILITY) { + function->parameter[0] = MIN_PROBABILITY; + } + if (function->parameter[0] > 1. - MIN_PROBABILITY) { + function->parameter[0] = 1. - MIN_PROBABILITY; + } + if (function->parameter[0] + function->parameter[1] < MIN_PROBABILITY) { + function->parameter[1] = MIN_PROBABILITY - function->parameter[0]; + } + if (function->parameter[0] + function->parameter[1] > 1. - MIN_PROBABILITY) { + function->parameter[1] = 1. - MIN_PROBABILITY - function->parameter[0]; + } + +# ifdef DEBUG + if ((iter < 10) || (iter % 10 == 0)) { + function->ascii_parameter_print(cout); + cout << "\niteration " << iter << ", " << residual_square_sum << " | " + << (previous_residual_square_sum - residual_square_sum) / residual_square_sum << endl; + } +# endif + + } + while (((previous_residual_square_sum - residual_square_sum) / residual_square_sum > RESIDUAL_SQUARE_SUM_DIFF) && + (iter < REGRESSION_NB_ITER)); + + // computation of the monomolecular function and the residuals + + function->computation(); + function->residual_computation(*this); + + return function; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of a nonhomogeneous Markov chain for sequences. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double NonhomogeneousMarkov::likelihood_computation(const MarkovianSequences &seq , int index) const + +{ + int i , j , k; + int *pstate; + double likelihood = 0. , proba; + Chain *index_chain; + + + // checking of the compatibility of the model with the data + + if (seq.nb_variable == 1) { + if ((seq.marginal_distribution[0]) && + (nb_state < seq.marginal_distribution[0]->nb_value)) { + likelihood = D_INF; + } + } + + else { + likelihood = D_INF; + } + + if (likelihood != D_INF) { + index_chain = new Chain(*this); + + for (i = 0;i < seq.nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + + // initialization of the transition probability matrix + + if (i > 0) { + index_chain->parameter_copy(*this); + } + + pstate = seq.int_sequence[i][0]; + + proba = initial[*pstate]; + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + + for (j = 1;j < seq.length[i];j++) { + + // change in transition probabilities with the index parameter + + for (k = 0;k < nb_state;k++) { + if (!homogeneity[k]) { + transition_update(k , j - 1 , *index_chain); + } + } + + proba = index_chain->transition[*pstate][*(pstate + 1)]; + pstate++; + + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + } + + if (likelihood == D_INF) { + break; + } + } + } + + delete index_chain; + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the initial state and transition counts. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkovData::build_transition_count() + +{ + chain_data = new ChainData(ORDINARY , marginal_distribution[0]->nb_value , + marginal_distribution[0]->nb_value); + transition_count_computation(*chain_data); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a nonhomogeneous Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] ident identifiers of the self-transition probability functions, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return NonhomogeneousMarkov object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkov* MarkovianSequences::nonhomogeneous_markov_estimation(StatError &error , parametric_function *ident , + bool counting_flag) const + +{ + bool status = true; + int i; + NonhomogeneousMarkov *markov; + NonhomogeneousMarkovData *seq; + + + markov = NULL; + error.init(); + + if (nb_variable > 1) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , 1); + } + if ((marginal_distribution[0]->nb_value < 2) || + (marginal_distribution[0]->nb_value > NB_STATE)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + + if (status) { + markov = new NonhomogeneousMarkov(marginal_distribution[0]->nb_value , ident); + markov->markov_data = new NonhomogeneousMarkovData(*this); + + seq = markov->markov_data; + seq->state_variable_init(); + seq->build_transition_count(); + + // estimation of the Markov chain parameters + + seq->chain_data->estimation(*markov); + + // estimation of the self-transition probability functions + + seq->self_transition_computation(markov->homogeneity); + + for (i = 0;i < markov->nb_state;i++) { + if (!(markov->homogeneity[i])) { + +# ifdef DEBUG + cout << *(seq->self_transition[i]); +# endif + + if (seq->self_transition[i]->nb_element_computation() >= REGRESSION_NB_ELEMENT) { + switch (ident[i]) { + case LOGISTIC : + markov->self_transition[i] = seq->self_transition[i]->logistic_regression(); + break; + case MONOMOLECULAR : + markov->self_transition[i] = seq->self_transition[i]->monomolecular_regression(); + break; + } + } + + else { + markov->homogeneity[i] = true; + } + } + } + + for (i = 0;i < markov->nb_state;i++) { + if (!(markov->homogeneity[i])) { + break; + } + } + + if (i == markov->nb_state) { + delete [] markov->self_transition; + markov->self_transition = NULL; + } + + // computation of the log-likelihood and the characteristic distributions of the model + + seq->likelihood = markov->likelihood_computation(*seq); + + if (seq->likelihood == D_INF) { + delete markov; + markov = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + + else { + markov->component_computation(); + markov->characteristic_computation(*seq , counting_flag , false); + } + } + + return markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a nonhomogeneous Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return NonhomogeneousMarkovData object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData* NonhomogeneousMarkov::simulation(StatError &error , + const FrequencyDistribution &length_distribution , + bool counting_flag) const + +{ + bool status = true; + int i , j , k; + int cumul_length , *pstate; + Chain *index_chain; + NonhomogeneousMarkov *markov; + NonhomogeneousMarkovData *seq; + + + seq = NULL; + error.init(); + + if ((length_distribution.nb_element < 1) || (length_distribution.nb_element > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length_distribution.offset < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length_distribution.nb_value - 1 > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + cumul_length = 0; + for (i = length_distribution.offset;i < length_distribution.nb_value;i++) { + cumul_length += i * length_distribution.frequency[i]; + } + + if (cumul_length > CUMUL_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_CUMUL_SEQUENCE_LENGTH]); + } + } + + if (status) { + + // initializations + + seq = new NonhomogeneousMarkovData(length_distribution); + seq->type[0] = STATE; + + seq->markov = new NonhomogeneousMarkov(*this , false); + + markov = seq->markov; + markov->create_cumul(); + markov->cumul_computation(); + + for (i = 0;i < markov->nb_state;i++) { + if (!(markov->homogeneity[i])) { + if (markov->self_transition[i]->max_value < seq->max_length - 1) { + delete [] markov->self_transition[i]->point; + markov->self_transition[i]->max_value = seq->max_length - 1; + markov->self_transition[i]->point = new double[markov->self_transition[i]->max_value + 1]; + markov->self_transition[i]->computation(); + } + } + } + + index_chain = new Chain(*markov); + + for (i = 0;i < seq->nb_sequence;i++) { + + // initialization of the transition probability matrix + + index_chain->parameter_copy(*this); + + pstate = seq->int_sequence[i][0]; + *pstate = cumul_method(markov->nb_state , markov->cumul_initial); + + for (j = 1;j < seq->length[i];j++) { + + // change in transition probabilities with the index parameter + + for (k = 0;k < markov->nb_state;k++) { + if (!markov->homogeneity[k]) { + transition_update(k , j - 1 , *index_chain); + } + } + + *(pstate + 1) = cumul_method(markov->nb_state , index_chain->cumul_transition[*pstate]); + pstate++; + } + } + + markov->remove_cumul(); + delete index_chain; + + // computation of the characteristics of the generated sequences + + for (i = 0;i < seq->nb_variable;i++) { + seq->max_value_computation(i); + seq->build_marginal_frequency_distribution(i); + } + + seq->self_transition_computation(markov->homogeneity); + seq->build_transition_count(); + seq->build_characteristic(); + + for (i = 0;i < markov->nb_state;i++) { + if (!(markov->homogeneity[i])) { + markov->self_transition[i]->regression_df = markov->self_transition[i]->nb_parameter; + markov->self_transition[i]->residual_df = seq->self_transition[i]->nb_element_computation() - + markov->self_transition[i]->nb_parameter; + + delete [] markov->self_transition[i]->residual; + delete [] markov->self_transition[i]->frequency; + markov->self_transition[i]->residual = new double[markov->self_transition[i]->max_value + 1]; + markov->self_transition[i]->frequency = new int[markov->self_transition[i]->max_value + 1]; + + markov->self_transition[i]->residual_computation(*(seq->self_transition[i])); + } + } + + markov->characteristic_computation(*seq , counting_flag); + + // computation of the log-likelihood of the model for the generated sequences + + seq->likelihood = markov->likelihood_computation(*seq); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a nonhomogeneous Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of sequences, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return NonhomogeneousMarkovData object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData* NonhomogeneousMarkov::simulation(StatError &error , int nb_sequence , + int length , bool counting_flag) const + +{ + bool status = true; + NonhomogeneousMarkovData *seq; + + + seq = NULL; + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + FrequencyDistribution length_distribution(length + 1); + + length_distribution.nb_element = nb_sequence; + length_distribution.offset = length; + length_distribution.max = nb_sequence; + length_distribution.mean = length; + length_distribution.variance = 0.; + length_distribution.frequency[length] = nb_sequence; + + seq = simulation(error , length_distribution , counting_flag); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a nonhomogeneous Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of sequences, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return NonhomogeneousMarkovData object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData* NonhomogeneousMarkov::simulation(StatError &error , int nb_sequence , + const MarkovianSequences &iseq , + bool counting_flag) const + +{ + FrequencyDistribution *length_distribution; + NonhomogeneousMarkovData *seq; + + + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + seq = NULL; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + length_distribution = iseq.length_distribution->frequency_scale(nb_sequence); + + seq = simulation(error , *length_distribution , counting_flag); + delete length_distribution; + } + + return seq; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/nonhomogeneous_markov.cpp b/src/cpp/sequence_analysis/nonhomogeneous_markov.cpp new file mode 100644 index 0000000..2253c09 --- /dev/null +++ b/src/cpp/sequence_analysis/nonhomogeneous_markov.cpp @@ -0,0 +1,2697 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: nonhomogeneous_markov.cpp 3257 2007-06-06 12:56:12Z dufourko $ + * + * Forum for StructureAnalysis developers: amldevlp@cirad.fr + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "nonhomogeneous_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the Function class. + */ +/*--------------------------------------------------------------*/ + +Function::Function() + +{ + residual = NULL; + frequency = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Function class. + * + * \param[in] iident identifier, + * \param[in] length length, + * \param[in] iparameter parameters. + */ +/*--------------------------------------------------------------*/ + +Function::Function(parametric_function iident , int length , double *iparameter) +:RegressionKernel(iident , 0 , length - 1) + +{ + int i; + + + for (i = 0;i < nb_parameter;i++) { + parameter[i] = iparameter[i]; + } + + residual = NULL; + frequency = NULL; + + computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Function class. + * + * \param[in] iident identifier, + * \param[in] length length. + */ +/*--------------------------------------------------------------*/ + +Function::Function(parametric_function iident , int length) +:RegressionKernel(iident , 0 , length - 1) + +{ + int i; + + + residual = new double[max_value + 1]; + for (i = 0;i <= max_value;i++) { + residual[i] = -D_INF; + } + + frequency = new int[max_value + 1]; + for (i = 0;i <= max_value;i++) { + frequency[i] = 0; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Function object. + * + * \param[in] function reference on a Function object. + */ +/*--------------------------------------------------------------*/ + +void Function::copy(const Function &function) + +{ + if ((function.residual) && (function.frequency)) { + int i; + + + residual = new double[max_value + 1]; + for (i = 0;i <= max_value;i++) { + residual[i] = function.residual[i]; + } + + frequency = new int[max_value + 1]; + for (i = 0;i <= max_value;i++) { + frequency[i] = function.frequency[i]; + } + } + + else { + residual = NULL; + frequency = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor by copy of the Function class. + * + * \param[in] function reference on a Function object. + */ +/*--------------------------------------------------------------*/ + +Function::Function(const Function &function) + +{ + RegressionKernel::copy(function); + copy(function); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a Function object. + */ +/*--------------------------------------------------------------*/ + +void Function::remove() + +{ + delete [] residual; + delete [] frequency; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the Function class. + */ +/*--------------------------------------------------------------*/ + +Function::~Function() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the Function class. + * + * \param[in] function reference on a Function object. + * + * \return Function object. + */ +/*--------------------------------------------------------------*/ + +Function& Function::operator=(const Function &function) + +{ + if (&function != this) { + remove(); + RegressionKernel::remove(); + + RegressionKernel::copy(function); + copy(function); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Analysis of the format of a Function object. + * + * \param[in] error reference on a StatError object, + * \param[in] in_file stream, + * \param[in] line reference on the file line index, + * \param[in] length length, + * \param[in] min lower bound. + * \param[in] max upper bound. + * + * \return Function object. + */ +/*--------------------------------------------------------------*/ + +Function* Function::parsing(StatError &error , ifstream &in_file , int &line , + int length , double min , double max) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + bool status = true , lstatus; + int i , j; + int nb_parameter = 0 , index; + parametric_function ident = NONPARAMETRIC_FUNCTION; + double parameter[3]; + Function *function; + + + function = NULL; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + if (i <= 1) { + switch (i) { + + // test LOGISTIC/MONOMOLECULAR keyword + + case 0 : { + for (j = LOGISTIC;j <= MONOMOLECULAR;j++) { + if (*token == STAT_function_word[j]) { + ident = (parametric_function)j; + break; + } + } + + if (j == MONOMOLECULAR + 1) { + status = false; + error.update(STAT_parsing[STATP_KEYWORD] , line , i + 1); + } + else { + nb_parameter = 3; + parameter[0] = D_DEFAULT; + } + break; + } + + // test FUNCTION keyword + + case 1 : { + if (*token != STAT_word[STATW_FUNCTION]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_FUNCTION] , line , i + 1); + } + break; + } + } + } + + else { + switch ((i - 2) % 4) { + + // test PARAMETER keyword + + case 0 : { + if (*token != STAT_word[STATW_PARAMETER]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_PARAMETER] , line , i + 1); + } + break; + } + + // test parameter index + + case 1 : { + lstatus = true; + +/* try { + index = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + index = atoi(token->c_str()); + + if ((lstatus) && (index != (i - 2) / 4 + 1)) { + lstatus = false; + } + + if (!lstatus) { + status = false; + error.correction_update(STAT_parsing[STATP_PARAMETER_INDEX] , (i - 2) / 4 + 1 , line , i + 1); + } + break; + } + + // test separator + + case 2 : { + if (*token != ":") { + status = false; + error.update(STAT_parsing[STATP_SEPARATOR] , line , i + 1); + } + break; + } + + // test parameter value + + case 3 : { + if ((i - 2) / 4 < nb_parameter) { + lstatus = true; + +/* try { + parameter[(i - 2) / 4] = stod(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + parameter[(i - 2) / 4] = atof(token->c_str()); + + if (lstatus) { + switch (ident) { + + case LOGISTIC : { + switch ((i - 2) / 4) { + + case 0 : { + if ((parameter[0] < min) || (parameter[0] > max)) { + lstatus = false; + } + break; + } + + case 1 : { + if ((parameter[0] != D_DEFAULT) && + ((parameter[0] / (1. + parameter[1]) < min) || (parameter[0] / (1. + parameter[1]) > max))) { + lstatus = false; + } + break; + } + + case 2 : { + if (parameter[2] <= 0.) { + lstatus = false; + } + break; + } + } + + break; + } + + case MONOMOLECULAR : { + switch ((i - 2) / 4) { + + case 0 : { + if ((parameter[0] < min) || (parameter[0] > max)) { + lstatus = false; + } + break; + } + + case 1 : { + if ((parameter[0] != D_DEFAULT) && + ((parameter[0] + parameter[1] < min) || (parameter[0] + parameter[1] > max))) { + lstatus = false; + } + break; + } + + case 2 : { + if (parameter[2] <= 0.) { + lstatus = false; + } + break; + } + } + + break; + } + } + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_PARAMETER_VALUE] , line , i + 1); + } + } + break; + } + } + } + + i++; + } + + if (i > 0) { + if (i != 14) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + break; + } + } + + if (ident == NONPARAMETRIC_FUNCTION) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + if (status) { + function = new Function(ident , length , parameter); + } + + return function; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Function object and the associated Curves object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level, + * \param[in] file_flag flag file, + * \param[in] curves pointer on a Curves object. + */ +/*--------------------------------------------------------------*/ + +ostream& Function::ascii_print(ostream &os , bool exhaustive , bool file_flag , + const Curves *curves) const + +{ + int i; + int *pfrequency , width[6]; + double self_transition_mean , residual_mean , residual_standard_deviation , + *standard_residual , square_sum[3]; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + ascii_parameter_print(os); + os << endl; + + if (curves) { + self_transition_mean = curves->mean_computation(0); + square_sum[0] = regression_square_sum_computation(self_transition_mean); + square_sum[1] = residual_square_sum_computation(); + square_sum[2] = curves->total_square_sum_computation(0 , self_transition_mean); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_DETERMINATION_COEFF] << ": " + << 1. - square_sum[1] / square_sum[2] << endl; + + if (file_flag) { + os << "# "; + } + os << regression_df << " " << STAT_label[STATL_REGRESSION] << " " << STAT_label[STATL_FREEDOM_DEGREES] << " " + << residual_df << " " << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_FREEDOM_DEGREES] << endl; + +# ifdef DEBUG + os << "\n" << STAT_label[STATL_REGRESSION] << " " << STAT_label[STATL_SQUARE_SUM] << ": " << square_sum[0] + << " " << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_SQUARE_SUM] << ": " << square_sum[1] + << " " << STAT_label[STATL_TOTAL] << " " << STAT_label[STATL_SQUARE_SUM] << ": " << square_sum[2] << endl; +# endif + + // writing of the residual mean and standard deviation + + residual_mean = residual_mean_computation(); + residual_standard_deviation = sqrt(residual_variance_computation(residual_mean)); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_MEAN] << ": " << residual_mean << " " + << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << ": " + << residual_standard_deviation << endl; + } + + if (exhaustive) { + if (curves) { + + // computation of the standardized residuals + + standard_residual = new double[max_value + 1]; + + for (i = 0;i <= max_value;i++) { + if (frequency[i] > 0) { + standard_residual[i] = residual[i] / residual_standard_deviation; + } + } + } + + // computation of the column widths + + width[0] = column_width(max_value); + width[2] = column_width(max_value + 1 , point) + ASCII_SPACE; + if (curves) { + width[1] = column_width(curves->length , curves->point[0]) + ASCII_SPACE; + width[3] = column_width(max_value + 1 , residual) + ASCII_SPACE; + width[4] = column_width(max_value + 1 , standard_residual) + ASCII_SPACE; + width[5] = column_width(curves->max_frequency_computation()) + ASCII_SPACE; + } + + // writing of the observed and theoretical self-transition probabilities, of the residuals, + // the standardized residuals and the frequencies + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if (curves) { + os << " | " << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_SELF_TRANSITION]; + } + os << " | " << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_SELF_TRANSITION]; + if (curves) { + os << " | " << STAT_label[STATL_RESIDUAL] << " | " << STAT_label[STATL_STANDARDIZED_RESIDUAL] + << " | " << STAT_label[STATL_FREQUENCY]; + } + os << endl; + + for (i = 0;i <= max_value;i++) { + if (file_flag) { + os << "# "; + } + os << setw(width[0]) << i; + + if (curves) { + if (frequency[i] > 0) { + os << setw(width[1]) << curves->point[0][i]; + } + else { + os << setw(width[1]) << " "; + } + } + + os << setw(width[2]) << point[i]; + + if (curves) { + if (frequency[i] > 0) { + os << setw(width[3]) << residual[i]; + os << setw(width[4]) << standard_residual[i]; + } + else { + os << setw(width[3]) << " "; + os << setw(width[4]) << " "; + } + + os << setw(width[5]) << frequency[i]; + } + + os << endl; + } + + if (curves) { + delete [] standard_residual; + } + } + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Function object and the associated Curves object + * at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] curves pointer on a Curves object. + */ +/*--------------------------------------------------------------*/ + +ostream& Function::spreadsheet_print(ostream &os , const Curves *curves) const + +{ + int i; + int *pfrequency; + double self_transition_mean , residual_mean , residual_standard_deviation , square_sum[3]; + + + os << STAT_function_word[ident] << " " << STAT_word[STATW_FUNCTION]; + for (i = 0;i < nb_parameter;i++) { + os << "\t\t" << STAT_word[STATW_PARAMETER] << " " << i + 1 << "\t" << parameter[i]; + } + os << endl; + + if (curves) { + self_transition_mean = curves->mean_computation(0); + square_sum[0] = regression_square_sum_computation(self_transition_mean); + square_sum[1] = residual_square_sum_computation(); + square_sum[2] = curves->total_square_sum_computation(0 , self_transition_mean); + + os << "\n" << STAT_label[STATL_DETERMINATION_COEFF] << "\t" + << 1. - square_sum[1] / square_sum[2] << endl; + + os << regression_df << "\t" << STAT_label[STATL_REGRESSION] << " " << STAT_label[STATL_FREEDOM_DEGREES] << "\t\t" + << residual_df << "\t" << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_FREEDOM_DEGREES] << endl; + +# ifdef DEBUG + os << "\n" << STAT_label[STATL_REGRESSION] << " " << STAT_label[STATL_SQUARE_SUM] << "\t" << square_sum[0] + << "\t\t" << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_SQUARE_SUM] << "\t" << square_sum[1] + << "\t\t" << STAT_label[STATL_TOTAL] << " " << STAT_label[STATL_SQUARE_SUM] << "\t" << square_sum[2] << endl; +# endif + + // writing of the residual mean and standard deviation + + residual_mean = residual_mean_computation(); + residual_standard_deviation = sqrt(residual_variance_computation(residual_mean)); + + os << "\n" << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_MEAN] << "\t" << residual_mean + << "\t\t" << STAT_label[STATL_RESIDUAL] << " " << STAT_label[STATL_STANDARD_DEVIATION] << "\t" + << residual_standard_deviation << endl; + } + + // writing of the observed and theoretical self-transition probabilities, of the residuals, + // the standardized residuals and the frequencies + + os << "\n"; + if (curves) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_SELF_TRANSITION]; + } + os << "\t" << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_SELF_TRANSITION]; + if (curves) { + os << "\t" << STAT_label[STATL_RESIDUAL] << "\t" << STAT_label[STATL_STANDARDIZED_RESIDUAL] + << "\t" << SEQ_label[SEQL_ASYMPTOTE] << "\t" << SEQ_label[SEQL_ASYMPTOTE] + << "\t" << STAT_label[STATL_FREQUENCY]; + } + os << endl; + + for (i = 0;i <= max_value;i++) { + os << i; + + if (curves) { + os << "\t"; + if (frequency[i] > 0) { + os << curves->point[0][i]; + } + } + + os << "\t" << point[i]; + + if (curves) { + if (frequency[i] > 0) { + os << "\t" << residual[i] << "\t" << residual[i] / residual_standard_deviation; + } + else { + os << "\t\t"; + } + + os << "\t" << (1. - point[i]) / residual_standard_deviation + << "\t" << -point[i] / residual_standard_deviation << "\t" << frequency[i]; + } + + os << endl; + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the self-transition probabilities and the bounds on + * the standardized residuals at the Gnuplot format. + * + * \param[in] path file path, + * \param[in] residual_standard_deviation residual standard deviation. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Function::plot_print(const char *path , double residual_standard_deviation) const + +{ + bool status = false; + int i; + ofstream out_file(path); + + + if (out_file) { + status = true; + + for (i = 0;i <= max_value - min_value;i++) { + out_file << point[i]; + if (residual_standard_deviation != D_DEFAULT) { + out_file << " " << (1. - point[i]) / residual_standard_deviation + << " " << -point[i] / residual_standard_deviation; + } + out_file << endl; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the NonhomogeneousMarkov class. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkov::NonhomogeneousMarkov() + +{ + markov_data = NULL; + + homogeneity = NULL; + self_transition = NULL; + + process = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the NonhomogeneousMarkov class. + * + * \param[in] inb_state number of states, + * \param[in] ident identifiers of the self-transition probability functions. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkov::NonhomogeneousMarkov(int inb_state , parametric_function *ident) +:Chain(ORDINARY , inb_state) + +{ + int i; + + + markov_data = NULL; + + homogeneity = new bool[nb_state]; + self_transition = new Function*[nb_state]; + + for (i = 0;i < nb_state;i++) { + if ((ident[i] == LOGISTIC) || (ident[i] == MONOMOLECULAR)) { + homogeneity[i] = false; + } + else { + homogeneity[i] = true; + } + self_transition[i] = NULL; + } + + process = new CategoricalSequenceProcess(nb_state , nb_state); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the NonhomogeneousMarkov class. + * + * \param[in] pchain pointer on a Chain object, + * \param[in] pself_transition pointer on Function objects, + * \param[in] length sequence length. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkov::NonhomogeneousMarkov(const Chain *pchain , const Function **pself_transition , + int length) +:Chain(*pchain) + +{ + int i; + + + markov_data = NULL; + + homogeneity = new bool[nb_state]; + self_transition = new Function*[nb_state]; + + for (i = 0;i < nb_state;i++) { + if (pself_transition[i]) { + homogeneity[i] = false; + self_transition[i] = new Function(*pself_transition[i]); + } + + else { + homogeneity[i] = true; + self_transition[i] = NULL; + } + } + + process = new CategoricalSequenceProcess(nb_state , nb_state); + + characteristic_computation(length , true); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a NonhomogeneousMarkov object. + * + * \param[in] markov reference on a NonhomogeneousMarkov object, + * \param[in] data_flag flag copy of the included NonhomogeneousMarkovData object, + * \param[in] characteristic_flag flag copy of the characteristic distributions. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::copy(const NonhomogeneousMarkov &markov , bool data_flag , + bool characteristic_flag) + +{ + int i; + + + if ((data_flag) && (markov.markov_data)) { + markov_data = new NonhomogeneousMarkovData(*(markov.markov_data) , false); + } + else { + markov_data = NULL; + } + + homogeneity = new bool[nb_state]; + self_transition = new Function*[nb_state]; + + for (i = 0;i < nb_state;i++) { + homogeneity[i] = markov.homogeneity[i]; + if (homogeneity[i]) { + self_transition[i] = NULL; + } + else { + self_transition[i] = new Function(*(markov.self_transition[i])); + } + } + + process = new CategoricalSequenceProcess(*(markov.process) , CATEGORICAL_SEQUENCE_PROCESS_COPY , + characteristic_flag); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a NonhomogeneousMarkov object. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkov::remove() + +{ + int i; + + + delete markov_data; + + if (self_transition) { + for (i = 0;i < nb_state;i++) { + if (!homogeneity[i]) { + delete self_transition[i]; + } + } + delete [] self_transition; + } + + delete [] homogeneity; + + delete process; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the NonhomogeneousMarkov class. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkov::~NonhomogeneousMarkov() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the NonhomogeneousMarkov class. + * + * \param[in] markov reference on a NonhomogeneousMarkov object. + * + * \return NonhomogeneousMarkov object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkov& NonhomogeneousMarkov::operator=(const NonhomogeneousMarkov &markov) + +{ + if (&markov != this) { + remove(); + Chain::remove(); + + Chain::copy(markov); + copy(markov); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] dist_type distribution type, + * \param[in] state state. + * + * \return DiscreteParametricModel object. + */ +/*--------------------------------------------------------------*/ + +DiscreteParametricModel* NonhomogeneousMarkov::extract(StatError &error , process_distribution dist_type , + int state) const + +{ + bool status = true; + Distribution *pdist; + DiscreteParametric *pparam; + DiscreteParametricModel *dist; + FrequencyDistribution *phisto; + + + dist = NULL; + error.init(); + + pdist = NULL; + pparam = NULL; + + if ((state < 0) || (state >= process->nb_value)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << state << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + switch (dist_type) { + case FIRST_OCCURRENCE : + pdist = process->first_occurrence[state]; + break; + case RECURRENCE_TIME : + if (process->recurrence_time) { + pdist = process->recurrence_time[state]; + } + break; + case SOJOURN_TIME : + if (process->sojourn_time) { + pparam = process->sojourn_time[state]; + } + break; + case NB_RUN : + pdist = process->nb_run[state]; + break; + case NB_OCCURRENCE : + pdist = process->nb_occurrence[state]; + break; + } + + if ((!pdist) && (!pparam)) { + status = false; + error.update(SEQ_error[SEQR_NON_EXISTING_CHARACTERISTIC_DISTRIBUTION]); + } + } + + if (status) { + phisto = NULL; + + if (markov_data) { + switch (dist_type) { + + case FIRST_OCCURRENCE : { + phisto = markov_data->characteristics[0]->first_occurrence[state]; + break; + } + + case RECURRENCE_TIME : { + if (markov_data->characteristics[0]->recurrence_time[state]->nb_element > 0) { + phisto = markov_data->characteristics[0]->recurrence_time[state]; + } + break; + } + + case SOJOURN_TIME : { + if (markov_data->characteristics[0]->sojourn_time[state]->nb_element > 0) { + phisto = markov_data->characteristics[0]->sojourn_time[state]; + } + break; + } + + case NB_RUN : { + phisto = markov_data->characteristics[0]->nb_run[state]; + break; + } + + case NB_OCCURRENCE : { + phisto = markov_data->characteristics[0]->nb_occurrence[state]; + break; + } + } + } + + if (pdist) { + dist = new DiscreteParametricModel(*pdist , phisto); + } + else if (pparam) { + dist = new DiscreteParametricModel(*pparam , phisto); + } + } + + return dist; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a NonhomogeneousMarkov object from a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] length sequence length. + * + * \return NonhomogeneousMarkov object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkov* NonhomogeneousMarkov::ascii_read(StatError &error , const string path , int length) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + bool status , lstatus; + int i , j; + int line , homogeneity , nb_state , index; + const Chain *chain; + const Function **self_transition; + NonhomogeneousMarkov *markov; + ifstream in_file(path.c_str()); + + + markov = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + line = 0; + + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + + // test NONHOMOGENEOUS_MARKOV_CHAIN keyword + + if (i == 0) { + if (*token != SEQ_word[SEQW_NONHOMOGENEOUS_MARKOV_CHAIN]) { + status = false; + error.update(STAT_parsing[STATP_KEYWORD] , line); + } + } + + i++; + } + + if (i > 0) { + if (i != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + break; + } + } + + // analysis of the format and reading of the Markov chain + + chain = Chain::parsing(error , in_file , line , ORDINARY); + + if (chain) { + nb_state = chain->nb_state; + self_transition = new const Function*[nb_state]; + for (i = 0;i < nb_state;i++) { + self_transition[i] = NULL; + } + + // analysis of the format of the self-transition probability functions + + for (i = 0;i < nb_state;i++) { + homogeneity = I_DEFAULT; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + j = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (j) { + + // test STATE keyword + + case 0 : { + if (*token != STAT_word[STATW_STATE]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_STATE] , line , j + 1); + } + break; + } + + // test state index + + case 1 : { + lstatus = true; + +/* try { + index = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + index = atoi(token->c_str()); + + if ((lstatus) && (index != i)) { + lstatus = false; + } + + if (!lstatus) { + status = false; + error.correction_update(STAT_parsing[STATP_STATE_INDEX] , i , line , j + 1); + } + break; + } + + // test HOMOGENEOUS/NONHOMOGENEOUS keyword + + case 2 : { + if (*token == SEQ_word[SEQW_HOMOGENEOUS]) { + homogeneity = true; + } + else { + if (*token == SEQ_word[SEQW_NONHOMOGENEOUS]) { + homogeneity = false; + } + else { + status = false; + error.update(STAT_parsing[STATP_KEYWORD] , line , j + 1); + } + } + break; + } + } + + j++; + } + + if (j > 0) { + if (j != 3) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + if (!homogeneity) { + self_transition[i] = Function::parsing(error , in_file , line , length); + if (!self_transition[i]) { + status = false; + } + } + + break; + } + } + + if (homogeneity == I_DEFAULT) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + if (!(trim_right_copy_if(buffer , is_any_of(" \t")).empty())) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + + if (status) { + markov = new NonhomogeneousMarkov(chain , self_transition , length); + } + + delete chain; + + for (i = 0;i < nb_state;i++) { + delete self_transition[i]; + } + delete [] self_transition; + } + } + + return markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing on a single line of a NonhomogeneousMarkov object. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& NonhomogeneousMarkov::line_write(ostream &os) const + +{ + os << nb_state << " " << STAT_word[STATW_STATES]; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a NonhomogeneousMarkov object and the associated data structure. + * + * \param[in,out] os stream, + * \param[in] seq pointer on a NonhomogeneousMarkovData object, + * \param[in] exhaustive flag detail level, + * \param[in] file_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& NonhomogeneousMarkov::ascii_write(ostream &os , const NonhomogeneousMarkovData *seq , + bool exhaustive , bool file_flag) const + +{ + int i; + + + os << SEQ_word[SEQW_NONHOMOGENEOUS_MARKOV_CHAIN] << endl; + + // writing of the Markov chain parameters + + ascii_print(os , file_flag); + + // writing of the self-transition probability function parameters + + for (i = 0;i < nb_state;i++) { + os << "\n" << STAT_word[STATW_STATE] << " " << i << " "; + + if (homogeneity[i]) { + os << SEQ_word[SEQW_HOMOGENEOUS] << endl; + } + else { + os << SEQ_word[SEQW_NONHOMOGENEOUS] << endl; + self_transition[i]->ascii_print(os , exhaustive , file_flag , + (seq ? seq->self_transition[i] : NULL)); + } + } + + process->ascii_print(os , 0 , NULL , NULL , (seq ? seq->characteristics[0] : NULL) , + exhaustive , file_flag); + + if (seq) { + int nb_parameter = nb_parameter_computation(); + double information , likelihood; + + + // writing of the sequence length frequency distribution + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + seq->length_distribution->ascii_characteristic_print(os , false , file_flag); + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + seq->length_distribution->ascii_print(os , file_flag); + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_CUMUL_LENGTH] << ": " << seq->cumul_length << endl; + + // writing of the information quantity of the sequences in the i.i.d. case + + information = seq->iid_information_computation(); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_IID_INFORMATION] << ": " << information << " (" + << information / seq->cumul_length << ")" << endl; + + // writing of the (penalized) log-likelihoods of the model for the sequences + + likelihood = seq->likelihood; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << likelihood / seq->cumul_length << ")" << endl; + + if ((likelihood != D_INF) && (nb_component == 1)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AIC] << "): " + << 2 * (likelihood - nb_parameter) << endl; + + if (nb_parameter < seq->cumul_length - 1) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AICc] << "): " + << 2 * (likelihood - (double)(nb_parameter * seq->cumul_length) / + (double)(seq->cumul_length - nb_parameter - 1)) << endl; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BIC] << "): " + << 2 * likelihood - nb_parameter * log((double)seq->cumul_length) << endl; + } + } + +# ifdef DEBUG + MultiPlotSet *plot_set; + + plot_set = get_plotable(seq); + delete plot_set; +# endif + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a NonhomogeneousMarkov object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& NonhomogeneousMarkov::ascii_write(ostream &os , bool exhaustive) const + +{ + return ascii_write(os , markov_data , exhaustive , false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a NonhomogeneousMarkov object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool NonhomogeneousMarkov::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + ascii_write(out_file , markov_data , exhaustive , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a NonhomogeneousMarkov object and the associated data structure + * in a file at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] seq pointer on a NonhomogeneousMarkovData object. + */ +/*--------------------------------------------------------------*/ + +ostream& NonhomogeneousMarkov::spreadsheet_write(ostream &os , const NonhomogeneousMarkovData *seq) const + +{ + int i; + + + os << SEQ_word[SEQW_NONHOMOGENEOUS_MARKOV_CHAIN] << endl; + + // writing of the Markov chain parameters + + spreadsheet_print(os); + + // writing of the self-transition probability function parameters + + for (i = 0;i < nb_state;i++) { + os << "\n" << STAT_word[STATW_STATE] << "\t" << i << "\t"; + + if (homogeneity[i]) { + os << SEQ_word[SEQW_HOMOGENEOUS] << endl; + } + else { + os << SEQ_word[SEQW_NONHOMOGENEOUS] << endl; + self_transition[i]->spreadsheet_print(os , (seq ? seq->self_transition[i] : NULL)); + } + } + + process->spreadsheet_print(os , 0 , NULL , NULL , (seq ? seq->characteristics[0] : NULL)); + + if (seq) { + int nb_parameter = nb_parameter_computation(); + double information , likelihood; + + + // writing of the sequence length frequency distribution + + os << "\n" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + seq->length_distribution->spreadsheet_characteristic_print(os); + + os << "\n\t" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + seq->length_distribution->spreadsheet_print(os); + + os << "\n" << SEQ_label[SEQL_CUMUL_LENGTH] << "\t" << seq->cumul_length << endl; + + // writing of the information quantity of the sequences in the i.i.d. case + + information = seq->iid_information_computation(); + + os << "\n" << SEQ_label[SEQL_IID_INFORMATION] << "\t" << information << "\t" + << information / seq->cumul_length << endl; + + // writing of the (penalized) log-likelihoods of the model for the sequences + + likelihood = seq->likelihood; + + os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << likelihood / seq->cumul_length << endl; + + if ((likelihood != D_INF) && (nb_component == 1)) { + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AIC] << ")\t" + << 2 * (likelihood - nb_parameter) << endl; + + if (nb_parameter < seq->cumul_length - 1) { + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AICc] << ")\t" + << 2 * (likelihood - (double)(nb_parameter * seq->cumul_length) / + (double)(seq->cumul_length - nb_parameter - 1)) << endl; + } + + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BIC] << ")\t" + << 2 * likelihood - nb_parameter * log((double)seq->cumul_length) << endl; + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a NonhomogeneousMarkov object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool NonhomogeneousMarkov::spreadsheet_write(StatError &error , const string path) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + spreadsheet_write(out_file , markov_data); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a NonhomogeneousMarkov object and the associated data structure + * using Gnuplot. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] seq pointer on a NonhomogeneousMarkovData object. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool NonhomogeneousMarkov::plot_write(const char *prefix , const char *title , + const NonhomogeneousMarkovData *seq) const + +{ + bool status; + int i , j; + int variable , start , *pfrequency , max_frequency[NB_STATE]; + double residual_mean , residual_standard_deviation , *standard_residual , *presidual , + min_standard_residual[NB_STATE] , max_standard_residual[NB_STATE]; + ostringstream data_file_name[NB_STATE * 2]; + + + if (seq) { + status = process->plot_print(prefix , title , 0 , NULL , NULL , + seq->characteristics[0] , seq->length_distribution); + } + else { + status = process->plot_print(prefix , title , 0); + } + + if (status) { + + // writing of the data files + + for (i = 0;i < nb_state;i++) { + if (!homogeneity[i]) { + if (seq) { + max_frequency[i] = seq->self_transition[i]->max_frequency_computation(); + + // computation of the standardized residuals + + residual_mean = self_transition[i]->residual_mean_computation(); + residual_standard_deviation = sqrt(self_transition[i]->residual_variance_computation(residual_mean)); + + standard_residual = new double[self_transition[i]->max_value + 1]; + + pfrequency = self_transition[i]->frequency; + presidual = self_transition[i]->residual; + min_standard_residual[i] = 0.; + max_standard_residual[i] = 0.; + + for (j = 0;j <= self_transition[i]->max_value;j++) { + if (*pfrequency++ > 0) { + standard_residual[j] = *presidual / residual_standard_deviation; + if (standard_residual[j] < min_standard_residual[i]) { + min_standard_residual[i] = standard_residual[j]; + } + if (standard_residual[j] > max_standard_residual[i]) { + max_standard_residual[i] = standard_residual[j]; + } + presidual++; + } + } + } + + data_file_name[i * 2] << prefix << i * 2 << ".dat"; + self_transition[i]->plot_print((data_file_name[i * 2].str()).c_str() , + (seq ? residual_standard_deviation : D_DEFAULT)); + + if (seq) { + data_file_name[i * 2 + 1] << prefix << i * 2 + 1 << ".dat"; + seq->self_transition[i]->plot_print_standard_residual((data_file_name[i * 2 + 1].str()).c_str() , + standard_residual); + delete [] standard_residual; + } + } + } + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << 0 << 0 << ".plot"; + break; + case 1 : + file_name[0] << prefix << 0 << 0 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << 0 << 0 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + start = true; + for (j = 0;j < nb_state;j++) { + if (!homogeneity[j]) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (self_transition[j]->max_value < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << self_transition[j]->max_value << "] [0:1] "; + if (seq) { + out_file << "\""<< label((data_file_name[j * 2 + 1].str()).c_str()) + << "\" using 1:2 title \"" << STAT_label[STATL_STATE] << " " << j << " - " + << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_SELF_TRANSITION] + << "\" with points,\\" << endl; + } + out_file << "\"" << label((data_file_name[j * 2].str()).c_str()) + << "\" using 1 title \"" << STAT_label[STATL_STATE] << " " << j << " - " + << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_SELF_TRANSITION] + << "\" with linespoints" << endl; + + if (self_transition[j]->max_value < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (seq) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set xlabel \"" << SEQ_label[SEQL_INDEX] << "\"" << endl; + out_file << "set ylabel \"" << STAT_label[STATL_STANDARDIZED_RESIDUAL] << "\"" << endl; + + if (seq->self_transition[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << seq->self_transition[j]->length - 1 << "] [" + << min_standard_residual[j] << ":" << max_standard_residual[j] << "] \"" + << label((data_file_name[j * 2 + 1].str()).c_str()) + << "\" using 1:3 notitle with points"; + if (((1. - self_transition[j]->point[0]) / residual_standard_deviation <= max_standard_residual[j]) || + ((1. - self_transition[j]->point[seq->self_transition[j]->length - 1]) / residual_standard_deviation <= + max_standard_residual[j])) { + out_file << ",\\\n\"" << label((data_file_name[j * 2].str()).c_str()) + << "\" using 2 title \"" << SEQ_label[SEQL_ASYMPTOTE] << "\" with lines"; + } + if ((-self_transition[j]->point[0] / residual_standard_deviation >= min_standard_residual[j]) || + (-self_transition[j]->point[seq->self_transition[j]->length - 1] / residual_standard_deviation >= + min_standard_residual[j])) { + out_file << ",\\\n\"" << label((data_file_name[j * 2].str()).c_str()) + << "\" using 3 title \"" << SEQ_label[SEQL_ASYMPTOTE] << "\" with lines"; + } + out_file << endl; + + out_file << "set xlabel" << endl; + out_file << "set ylabel" << endl; + + if (seq->self_transition[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + out_file << "set xlabel \"" << SEQ_label[SEQL_INDEX] << "\"" << endl; + out_file << "set ylabel \"" << STAT_label[STATL_FREQUENCY] << "\"" << endl; + + if (seq->self_transition[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(max_frequency[j] * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << seq->self_transition[j]->length - 1 + << "] [0:" << (int)(max_frequency[j] * YSCALE) + 1 << "] \"" + << label((data_file_name[j * 2 + 1].str()).c_str()) + << "\" using 1:4 title \"" << STAT_label[STATL_STATE] << " " + << j << " - "<< SEQ_label[SEQL_TRANSITION_COUNTS] + << "\" with impulses" << endl; + + out_file << "set xlabel" << endl; + out_file << "set ylabel" << endl; + + if (seq->self_transition[j]->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if (max_frequency[j] < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a NonhomogeneousMarkov object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool NonhomogeneousMarkov::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status = plot_write(prefix , title , markov_data); + + error.init(); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a NonhomogeneousMarkov object and the associated data structure. + * + * \param[in] seq pointer on a NonhomogeneousMarkovData object . + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* NonhomogeneousMarkov::get_plotable(const NonhomogeneousMarkovData *seq) const + +{ + int i , j , k; + int nb_plot_set , index_length , index , nb_plot , max_frequency , *pfrequency; + double residual_mean , residual_standard_deviation , min_standard_residual , + max_standard_residual , *standard_residual , *presidual; + ostringstream legend; + FrequencyDistribution *length_distribution; + SequenceCharacteristics *characteristics; + MultiPlotSet *plot_set; + + + if (seq) { + characteristics = seq->characteristics[0]; + length_distribution = seq->length_distribution; + } + else { + characteristics = NULL; + length_distribution = NULL; + } + + // computation of the number of plots + + nb_plot_set = 0; + + if ((process->index_value) || (characteristics)) { + nb_plot_set++; + + if (characteristics) { + index_length = characteristics->index_value->plot_length_computation(); + + if (characteristics->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + nb_plot_set++; + } + nb_plot_set++; + } + } + + if ((process->first_occurrence) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((process->first_occurrence) && (process->first_occurrence[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((process->recurrence_time) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((process->recurrence_time) && (process->recurrence_time[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((process->sojourn_time) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((process->sojourn_time) && (process->sojourn_time[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + nb_plot_set++; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->initial_run) && + (characteristics->initial_run[i]->nb_element > 0)) { + nb_plot_set++; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((process->nb_run) || (process->nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + for (i = 0;i < nb_state;i++) { + if (process->nb_run) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_run) && (characteristics->nb_run[i]->nb_element > 0)) { + nb_plot_set++; + } + + if (process->nb_occurrence) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + nb_plot_set++; + } + } + + if ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence)) { + nb_plot_set++; + } + } + + for (i = 0;i < nb_state;i++) { + if (!homogeneity[i]) { + nb_plot_set++; + if (seq) { + nb_plot_set += 2; + } + } + } + + plot_set = new MultiPlotSet(nb_plot_set , 1); + + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + plot.variable_nb_viewpoint[0] = 1; + + index = 0; + for (i = 0;i < nb_state;i++) { + if (!homogeneity[i]) { + + // self-transition in state i probability function + + plot.variable[index] = 0; + plot.viewpoint[index] = SELF_TRANSITION; + + plot[index].xrange = Range(0 , self_transition[i]->max_value); + plot[index].yrange = Range(0. , 1.); + + if (self_transition[i]->max_value < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + if (seq) { + plot[index].resize(2); + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " - " + << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_SELF_TRANSITION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "points"; + + seq->self_transition[i]->plotable_write(0 , plot[index][0]); + j = 1; + } + + else { + plot[index].resize(1); + j = 0; + } + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " - " + << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_SELF_TRANSITION]; + plot[index][j].legend = legend.str(); + + plot[index][j].style = "linespoint"; + + self_transition[i]->plotable_write(plot[index][j]); + index++; + + if (seq) { + + // computation of the standardized residuals + + residual_mean = self_transition[i]->residual_mean_computation(); + residual_standard_deviation = sqrt(self_transition[i]->residual_variance_computation(residual_mean)); + + standard_residual = new double[self_transition[i]->max_value + 1]; + + pfrequency = self_transition[i]->frequency; + presidual = self_transition[i]->residual; + min_standard_residual = 0.; + max_standard_residual = 0.; + + for (j = 0;j <= self_transition[i]->max_value;j++) { + if (*pfrequency++ > 0) { + standard_residual[j] = *presidual / residual_standard_deviation; + if (standard_residual[j] < min_standard_residual) { + min_standard_residual = standard_residual[j]; + } + if (standard_residual[j] > max_standard_residual) { + max_standard_residual = standard_residual[j]; + } + presidual++; + } + } + + // standardized residuals + + plot.variable[index] = 0; + plot.viewpoint[index] = SELF_TRANSITION; + + plot[index].xrange = Range(0 , seq->self_transition[i]->length - 1); + if (seq->self_transition[i]->length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].yrange = Range(min_standard_residual , max_standard_residual); + + plot[index].xlabel = SEQ_label[SEQL_INDEX]; + plot[index].ylabel = STAT_label[STATL_STANDARDIZED_RESIDUAL]; + + nb_plot = 1; + if (((1. - self_transition[i]->point[0]) / residual_standard_deviation <= max_standard_residual) || + ((1. - self_transition[i]->point[seq->self_transition[i]->length - 1]) / residual_standard_deviation <= + max_standard_residual)) { + nb_plot++; + } + if ((-self_transition[i]->point[0] / residual_standard_deviation >= min_standard_residual) || + (-self_transition[i]->point[seq->self_transition[i]->length - 1] / residual_standard_deviation >= + min_standard_residual)) { + nb_plot++; + } + plot[index].resize(nb_plot); + + plot[index][0].style = "points"; + + pfrequency = self_transition[i]->frequency; + for (j = 0;j <= self_transition[i]->max_value;j++) { + if (*pfrequency++ > 0) { + plot[index][0].add_point(j , standard_residual[j]); + } + } + + j = 1; + if (((1. - self_transition[i]->point[0]) / residual_standard_deviation <= max_standard_residual) || + ((1. - self_transition[i]->point[seq->self_transition[i]->length - 1]) / residual_standard_deviation <= + max_standard_residual)) { + plot[index][j].legend = SEQ_label[SEQL_ASYMPTOTE]; + + plot[index][j].style = "lines"; + + for (k = 0;k <= self_transition[i]->max_value;k++) { + plot[index][j].add_point(k , (1. - self_transition[i]->point[k]) / residual_standard_deviation); + } + j++; + } + + if ((-self_transition[i]->point[0] / residual_standard_deviation >= min_standard_residual) || + (-self_transition[i]->point[seq->self_transition[i]->length - 1] / residual_standard_deviation >= + min_standard_residual)) { + plot[index][j].legend = SEQ_label[SEQL_ASYMPTOTE]; + + plot[index][j].style = "lines"; + + for (k = 0;k <= self_transition[i]->max_value;k++) { + plot[index][j].add_point(k , -self_transition[i]->point[k] / residual_standard_deviation); + } + } + index++; + + // self-transition in state i empirical function + + plot.variable[index] = 0; + plot.viewpoint[index] = SELF_TRANSITION; + + plot[index].xrange = Range(0 , seq->self_transition[i]->length - 1); + max_frequency = seq->self_transition[i]->max_frequency_computation(); + plot[index].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (seq->self_transition[i]->length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].xlabel = SEQ_label[SEQL_INDEX]; + plot[index].ylabel = STAT_label[STATL_FREQUENCY]; + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[STATL_STATE] << " " << i << " - " + << SEQ_label[SEQL_TRANSITION_COUNTS]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + seq->self_transition[i]->plotable_frequency_write(plot[index][0]); + index++; + + delete [] standard_residual; + } + } + } + + process->plotable_write(*plot_set , index , 0 , NULL , NULL , + characteristics , length_distribution); + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a NonhomogeneousMarkov object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* NonhomogeneousMarkov::get_plotable() const + +{ + return get_plotable(markov_data); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of parameters of a NonhomogeneousMarkov object. + * + * \return number of parameters. + */ +/*--------------------------------------------------------------*/ + +int NonhomogeneousMarkov::nb_parameter_computation() const + +{ + int i; + int nb_parameter = Chain::nb_parameter_computation(); + + + for (i = 0;i < nb_state;i++) { + if (!homogeneity[i]) { + nb_parameter += self_transition[i]->nb_parameter - 1; + } + } + + return nb_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the NonhomogeneousMarkovData class. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData::NonhomogeneousMarkovData() + +{ + markov = NULL; + chain_data = NULL; + likelihood = D_INF; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the NonhomogeneousMarkovData class. + * + * \param[in] ilength_distribution sequence length frequency distribution. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData::NonhomogeneousMarkovData(const FrequencyDistribution &ilength_distribution) +:MarkovianSequences(ilength_distribution , 1 , NULL , false) + +{ + markov = NULL; + chain_data = NULL; + likelihood = D_INF; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a NonhomogeneousMarkovData object from + * a MarkovianSequences object. + * + * \param[in] seq reference on a MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData::NonhomogeneousMarkovData(const MarkovianSequences &seq) +:MarkovianSequences(seq) + +{ + markov = NULL; + chain_data = NULL; + likelihood = D_INF; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a NonhomogeneousMarkovData object. + * + * \param[in] seq reference on a NonhomogeneousMarkovData object, + * \param[in] model_flag flag copy of the included NonhomogeneousMarkov object. + */ +/*--------------------------------------------------------------*/ + +void NonhomogeneousMarkovData::copy(const NonhomogeneousMarkovData &seq , bool model_flag) + +{ + if ((model_flag) && (seq.markov)) { + markov = new NonhomogeneousMarkov(*(seq.markov) , false); + } + else { + markov = NULL; + } + + if (seq.chain_data) { + chain_data = new ChainData(*(seq.chain_data)); + } + else { + chain_data = NULL; + } + + likelihood = seq.likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the NonhomogeneousMarkovData class. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData::~NonhomogeneousMarkovData() + +{ + delete markov; + delete chain_data; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the NonhomogeneousMarkovData class. + * + * \param[in] seq reference on a NonhomogeneousMarkovData object. + * + * \return NonhomogeneousMarkovData object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData& NonhomogeneousMarkovData::operator=(const NonhomogeneousMarkovData &seq) + +{ + if (&seq != this) { + delete markov; + delete chain_data; + + remove(); + Sequences::remove(); + + Sequences::copy(seq); + MarkovianSequences::copy(seq); + copy(seq); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a frequency distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] histo_type frequency distribution type, + * \param[in] state state. + * + * \return DiscreteDistributionData object. + */ +/*--------------------------------------------------------------*/ + +DiscreteDistributionData* NonhomogeneousMarkovData::extract(StatError &error , process_distribution histo_type , + int state) const + +{ + bool status = true; + Distribution *pdist; + DiscreteParametric *pparam; + FrequencyDistribution *phisto; + DiscreteDistributionData *histo; + + + histo = NULL; + error.init(); + + if ((state < 0) || (state >= marginal_distribution[0]->nb_value)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << state << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + switch (histo_type) { + case FIRST_OCCURRENCE : + phisto = characteristics[0]->first_occurrence[state]; + break; + case RECURRENCE_TIME : + phisto = characteristics[0]->recurrence_time[state]; + break; + case SOJOURN_TIME : + phisto = characteristics[0]->sojourn_time[state]; + break; + case FINAL_RUN : + phisto = characteristics[0]->final_run[state]; + break; + case NB_RUN : + phisto = characteristics[0]->nb_run[state]; + break; + case NB_OCCURRENCE : + phisto = characteristics[0]->nb_occurrence[state]; + break; + } + + if (phisto->nb_element == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + } + + if (status) { + pdist = NULL; + pparam = NULL; + + switch (histo_type) { + case FIRST_OCCURRENCE : + pdist = markov->process->first_occurrence[state]; + break; + case RECURRENCE_TIME : + if (markov->process->recurrence_time) { + pdist = markov->process->recurrence_time[state]; + } + break; + case SOJOURN_TIME : + if (markov->process->sojourn_time) { + pparam = markov->process->sojourn_time[state]; + } + break; + case NB_RUN : + pdist = markov->process->nb_run[state]; + break; + case NB_OCCURRENCE : + pdist = markov->process->nb_occurrence[state]; + break; + } + + if (pdist) { + histo = new DiscreteDistributionData(*phisto , pdist); + } + else { + histo = new DiscreteDistributionData(*phisto , pparam); + } + } + + return histo; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a NonhomogeneousMarkovData object transforming the implicit index parameters in + * explicit index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return NonhomogeneousMarkovData object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData* NonhomogeneousMarkovData::explicit_index_parameter(StatError &error) const + +{ + NonhomogeneousMarkovData *seq; + + + error.init(); + + if (index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new NonhomogeneousMarkovData(*this , true , EXPLICIT_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Removing of the index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return NonhomogeneousMarkovData object. + */ +/*--------------------------------------------------------------*/ + +NonhomogeneousMarkovData* NonhomogeneousMarkovData::remove_index_parameter(StatError &error) const + +{ + NonhomogeneousMarkovData *seq; + + + error.init(); + + if (!index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new NonhomogeneousMarkovData(*this , true , REMOVE_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a NonhomogeneousMarkovData object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& NonhomogeneousMarkovData::ascii_write(ostream &os , bool exhaustive) const + +{ + if (markov) { + markov->ascii_write(os , this , exhaustive , false); + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a NonhomogeneousMarkovData object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool NonhomogeneousMarkovData::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status = false; + + + if (markov) { + ofstream out_file(path.c_str()); + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + markov->ascii_write(out_file , this , exhaustive , true); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a NonhomogeneousMarkovData object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool NonhomogeneousMarkovData::spreadsheet_write(StatError &error , const string path) const + +{ + bool status = false; + + + if (markov) { + ofstream out_file(path.c_str()); + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + markov->spreadsheet_write(out_file , this); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a NonhomogeneousMarkovData object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool NonhomogeneousMarkovData::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status = false; + + + if (markov) { + status = markov->plot_write(prefix , title , this); + + error.init(); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a NonhomogeneousMarkovData object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* NonhomogeneousMarkovData::get_plotable() const + +{ + MultiPlotSet *plot_set; + + + if (markov) { + plot_set = markov->get_plotable(this); + } + else { + plot_set = NULL; + } + + return plot_set; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/nonhomogeneous_markov.h b/src/cpp/sequence_analysis/nonhomogeneous_markov.h new file mode 100644 index 0000000..68787d8 --- /dev/null +++ b/src/cpp/sequence_analysis/nonhomogeneous_markov.h @@ -0,0 +1,250 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: nonhomogeneous_markov.h 3257 2007-06-06 12:56:12Z dufourko $ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef NONHOMOGENEOUS_MARKOV_H +#define NONHOMOGENEOUS_MARKOV_H + + +#include "sequences.h" + + +namespace sequence_analysis { + + + +/**************************************************************** + * + * Constants + */ + + + const double START_RATIO = 0.03; // sample proportion for the parameter initialization (beginning) + const double END_RATIO = 0.1; // sample proportion for the parameter initialization (end) + const int REGRESSION_NB_ELEMENT = 100; // minimum sample size for the nonlinear regression + const double GRADIENT_DESCENT_COEFF = 1.; // coefficient for the gradient descent algorithm + const double RESIDUAL_SQUARE_SUM_DIFF = 1.e-6; // threshold for stopping the iterations of + // the gradient descent algorithm + const int REGRESSION_NB_ITER = 1000; // number of iterations for the nonlinear regression estimation + + + +/**************************************************************** + * + * Class definition + */ + + + /// \brief Self-transition probability function + + class Function : public stat_tool::RegressionKernel { + + public : + + double *residual; ///< residuals + int *frequency; ///< frequency for each index + + void copy(const Function&); + void remove(); + + Function(); + Function(stat_tool::parametric_function iident , int length , double *iparameter); + Function(stat_tool::parametric_function iident , int length); + Function(const Function &function); + ~Function(); + Function& operator=(const Function &function); + + static Function* parsing(stat_tool::StatError &error , std::ifstream &in_file , int &line , + int length , double min = 0. , double max = 1.); + + std::ostream& ascii_print(std::ostream &os , bool exhaustive , bool file_flag , + const stat_tool::Curves *curves = NULL) const; + std::ostream& spreadsheet_print(std::ostream &os , const stat_tool::Curves *curves = NULL) const; + bool plot_print(const char *path , double residual_standard_deviation = stat_tool::D_DEFAULT) const; + + double regression_square_sum_computation(double self_transition_mean) const; + void residual_computation(const SelfTransition &self_transition); + double residual_mean_computation() const; + double residual_variance_computation(double residual_mean) const; + double residual_square_sum_computation() const; + }; + + + class NonhomogeneousMarkovData; + + /// \brief Nonhomogeneous Markov chain + + class NonhomogeneousMarkov : public stat_tool::StatInterface , protected stat_tool::Chain { + + friend class MarkovianSequences; + friend class NonhomogeneousMarkovData; + + friend std::ostream& operator<<(std::ostream &os , const NonhomogeneousMarkov &markov) + { return markov.ascii_write(os , markov.markov_data); } + + protected : + + NonhomogeneousMarkovData *markov_data; ///< pointer on a NonhomogeneousMarkovData object + bool *homogeneity; ///< state homogeneities + Function **self_transition; ///< self-transition probability functions + CategoricalSequenceProcess *process; + + void copy(const NonhomogeneousMarkov &markov , bool data_flag = true , + bool characteristic_flag = true); + void remove(); + + std::ostream& ascii_write(std::ostream &os , const NonhomogeneousMarkovData *seq , + bool exhaustive = false , bool file_flag = false) const; + std::ostream& spreadsheet_write(std::ostream &os , const NonhomogeneousMarkovData *seq) const; + bool plot_write(const char *prefix , const char *title , + const NonhomogeneousMarkovData *seq) const; + stat_tool::MultiPlotSet* get_plotable(const NonhomogeneousMarkovData *seq) const; + + int nb_parameter_computation() const; + + void transition_update(int state , int index , stat_tool::Chain &index_chain) const; + void index_state_distribution(); + void state_no_occurrence_probability(int state , double increment = LEAVE_INCREMENT); + void state_first_occurrence_distribution(int state , int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void state_nb_pattern_mixture(int state , count_pattern pattern); + + public : + + NonhomogeneousMarkov(); + NonhomogeneousMarkov(int inb_state , parametric_function *ident); + NonhomogeneousMarkov(const stat_tool::Chain *pchain , const Function **pself_transition , int length); + NonhomogeneousMarkov(const NonhomogeneousMarkov &markov , bool data_flag = true , + bool characteristic_flag = true) + :stat_tool::Chain(markov) { copy(markov , data_flag , characteristic_flag); } + ~NonhomogeneousMarkov(); + NonhomogeneousMarkov& operator=(const NonhomogeneousMarkov &markov); + + DiscreteParametricModel* extract(stat_tool::StatError &error , + stat_tool::process_distribution dist_type , int state) const; + + static NonhomogeneousMarkov* ascii_read(stat_tool::StatError &error , const std::string path , + int length = DEFAULT_LENGTH); + + std::ostream& line_write(std::ostream &os) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + void characteristic_computation(int length , bool counting_flag); + void characteristic_computation(const NonhomogeneousMarkovData &seq , bool counting_flag , + bool length_flag = true); + + double likelihood_computation(const MarkovianSequences &seq , + int index = stat_tool::I_DEFAULT) const; + + NonhomogeneousMarkovData* simulation(stat_tool::StatError &error , + const stat_tool::FrequencyDistribution &hlength , + bool counting_flag = true) const; + NonhomogeneousMarkovData* simulation(stat_tool::StatError &error , int nb_sequence , + int length , bool counting_flag = true) const; + NonhomogeneousMarkovData* simulation(stat_tool::StatError &error , int nb_sequence , + const MarkovianSequences &iseq , + bool counting_flag = true) const; + + // class member access + + NonhomogeneousMarkovData* get_markov_data() const { return markov_data; } + bool get_homogeneity(int state) const { return homogeneity[state]; } + Function* get_self_transition(int state) const { return self_transition[state]; } + CategoricalSequenceProcess* get_process() const { return process; } + }; + + + /// \brief Data structure corresponding to a nonhomogeneous Markov chain + + class NonhomogeneousMarkovData : public MarkovianSequences { + + friend class MarkovianSequences; + friend class NonhomogeneousMarkov; + + friend std::ostream& operator<<(std::ostream &os , const NonhomogeneousMarkovData &seq) + { return seq.ascii_write(os , false); } + + private : + + NonhomogeneousMarkov *markov; ///< pointer on a NonhomogeneousMarkov object + stat_tool::ChainData *chain_data; ///< initial states and transitions + double likelihood; ///< log-likelihood for the observed sequences + + void copy(const NonhomogeneousMarkovData &seq , bool model_flag = true); + + public : + + NonhomogeneousMarkovData(); + NonhomogeneousMarkovData(const stat_tool::FrequencyDistribution &ihlength); + NonhomogeneousMarkovData(const MarkovianSequences &seq); + NonhomogeneousMarkovData(const NonhomogeneousMarkovData &seq , bool model_flag = true , + sequence_transformation transform = SEQUENCE_COPY) + :MarkovianSequences(seq , transform) { copy(seq , model_flag); } + ~NonhomogeneousMarkovData(); + NonhomogeneousMarkovData& operator=(const NonhomogeneousMarkovData &seq); + + stat_tool::DiscreteDistributionData* extract(stat_tool::StatError &error , + stat_tool::process_distribution histo_type , int state) const; + NonhomogeneousMarkovData* explicit_index_parameter(stat_tool::StatError &error) const; + NonhomogeneousMarkovData* remove_index_parameter(stat_tool::StatError &error) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + void build_transition_count(); + + // class member access + + NonhomogeneousMarkov* get_markov() const { return markov; } + stat_tool::ChainData* get_chain_data() const { return chain_data; } + double get_likelihood() const { return likelihood; } + }; + + +}; // namespace sequence_analysis + + + +#endif diff --git a/src/cpp/sequence_analysis/renewal.h b/src/cpp/sequence_analysis/renewal.h new file mode 100644 index 0000000..f210b38 --- /dev/null +++ b/src/cpp/sequence_analysis/renewal.h @@ -0,0 +1,503 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef RENEWAL_H +#define RENEWAL_H + + +#include "stat_tool/curves.h" +#include "stat_tool/distribution.h" + + +namespace sequence_analysis { + + + +/**************************************************************** + * + * Constants + */ + + + const int DEFAULT_TIME = 20; // default observation period + const int MAX_TIME = 500; // maximum observation period + const int PLOT_NEVENT_TIME = 10; // maximum number of time to the nth event distributions + // plotted (Gnuplot output) + const int PLOT_NB_TIME = 5; // maximum number of distributions of the number of events plotted + // with the mixture of the number of events distributions (Gnuplot output) + + const double RENEWAL_THRESHOLD = 0.99999; // threshold on the cumulative distribution function for determining + // the upper bound of the support of the inter-event distribution + const double RB_THRESHOLD = 2000.; // threshold for using the fast computation of the number of events distribution + // from a binomial inter-event distribution + const double RNB_THRESHOLD = 2000.; // threshold for using the fast computation of the number of events distribution + // from a negative binomiale inter-event distribution + + enum renewal_distribution { + INTER_EVENT , + WITHIN_OBSERVATION_PERIOD , + LENGTH_BIAS , + BACKWARD_RECURRENCE_TIME , + FORWARD_RECURRENCE_TIME , + NB_EVENT , + NB_EVENT_MIXTURE + }; + + const double MIN_NB_EVENT = 0.4; // minimum mean number of events + const double MIN_INTER_EVENT = 1.; // minimum mean time interval between events + const double RENEWAL_INIT_PROBABILITY = 0.001; // threshold for probability initialization + const int RENEWAL_COEFF = 10; // rounding coefficient for the estimator + + const double MEAN_COEFF = 2.; // coefficient on the mean for compensating the length bias + + const int RENEWAL_NB_ELEMENT = 1000000; // maximum sample size for simulation + + + +/**************************************************************** + * + * Class definition + */ + + + /// \brief Length-biased distribution + + class LengthBias : public stat_tool::DiscreteParametric { + + public : + + LengthBias(int inb_value = 0 , stat_tool::discrete_parametric iident = stat_tool::CATEGORICAL , + int iinf_bound = stat_tool::I_DEFAULT , int isup_bound = stat_tool::I_DEFAULT , + double iparameter = stat_tool::D_DEFAULT, double iprobability = stat_tool::D_DEFAULT) + :stat_tool::DiscreteParametric(inb_value , iident , iinf_bound , isup_bound , iparameter , iprobability) {} + LengthBias(const DiscreteParametric &inter_event) + :stat_tool::DiscreteParametric(inter_event) { computation(inter_event); } + LengthBias(const LengthBias &length_bias) + :stat_tool::DiscreteParametric((DiscreteParametric&)length_bias) {} + + void computation(const stat_tool::DiscreteParametric&); + }; + + + /// \brief Backward recurrence time distribution + + class Backward : public stat_tool::DiscreteParametric { + + public : + + Backward(int inb_value = 0 , stat_tool::discrete_parametric iident = stat_tool::CATEGORICAL , + int iinf_bound = stat_tool::I_DEFAULT , int isup_bound = stat_tool::I_DEFAULT , + double iparameter = stat_tool::D_DEFAULT, double iprobability = stat_tool::D_DEFAULT) + :stat_tool::DiscreteParametric(inb_value , iident , iinf_bound , isup_bound , iparameter , iprobability) {} + Backward(const Backward &dist , int ialloc_nb_value = stat_tool::I_DEFAULT) + :stat_tool::DiscreteParametric(dist , stat_tool::DISTRIBUTION_COPY , ialloc_nb_value) {} + + void computation(const stat_tool::DiscreteParametric &inter_event , const stat_tool::Distribution &time); + }; + + + /// \brief Number of events distribution + + class NbEvent : public stat_tool::DiscreteParametric { + + public : + + stat_tool::process_type type; ///< renewal process type (ORDINARY/EQUILIBRIUM) + int time; ///< observation period + + NbEvent(stat_tool::process_type itype = stat_tool::EQUILIBRIUM , int itime = 0 , int inb_value = 0 , + stat_tool::discrete_parametric iident = stat_tool::CATEGORICAL , + int iinf_bound = stat_tool::I_DEFAULT , int isup_bound = stat_tool::I_DEFAULT , + double iparameter = stat_tool::D_DEFAULT , double iprobability = stat_tool::D_DEFAULT); + NbEvent(stat_tool::process_type itype , int itime , stat_tool::DiscreteParametric &inter_event); + NbEvent(const NbEvent &nb_event , int ialloc_nb_value = stat_tool::I_DEFAULT); + + void binomial_computation(); + void negative_binomial_computation(); + + void ordinary_computation(stat_tool::DiscreteParametric &inter_event); + void computation(stat_tool::DiscreteParametric &inter_event); + }; + + + class RenewalIterator; + class TimeEvents; + class RenewalData; + + /// \brief Renewal process + + class Renewal : public stat_tool::StatInterface { + + friend class RenewalIterator; + friend class TimeEvents; + friend class RenewalData; + + friend std::ostream& operator<<(std::ostream &os , const Renewal &renew) + { return renew.ascii_write(os , renew.renewal_data , false , false); } + + private : + + int nb_iterator; ///< number of iterators pointing on the Renewal object + RenewalData *renewal_data; ///< pointer on a RenewalData object + stat_tool::process_type type; ///< renewal process type (ORDINARY/EQUILIBRIUM) + int nb_event_max; ///< maximum number of events + stat_tool::Distribution *time; ///< observation period distribution + stat_tool::DiscreteParametric *inter_event; ///< inter-event distribution + LengthBias *length_bias; ///< length-biased distribution + Backward *backward; ///< backward recurrence time distribution + stat_tool::Forward *forward; ///< forward recurrence time distribution + stat_tool::DiscreteParametric **nevent_time; ///< time to the nth event distributions + NbEvent **nb_event; ///< number of events distributions for the different observation periods + stat_tool::Distribution *mixture; ///< mixture of the number of events distributions + stat_tool::Curves *index_event; ///< no-event/event probabilities as a function of time + + void init(int inf_bound , int sup_bound , double parameter , double probability); + void init(stat_tool::discrete_parametric ident , int inf_bound , int sup_bound , + double parameter , double probability); + void copy(const Renewal &renew , bool data_flag = true); + void remove(); + void type_init(stat_tool::process_type itype); + + std::ostream& ascii_write(std::ostream &os , const RenewalData *timev , + bool exhaustive , bool file_flag) const; + std::ostream& spreadsheet_write(std::ostream &os , const RenewalData *timev) const; + bool plot_write(const char *prefix , const char *title , + const RenewalData *timev) const; + stat_tool::MultiPlotSet* get_plotable(const RenewalData *timev) const; + + void index_event_computation(); + + void expectation_step(const TimeEvents &timev , stat_tool::Reestimation *reestim) const; + void expectation_step(const TimeEvents &timev , stat_tool::Reestimation *inter_event_reestim , + stat_tool::Reestimation *length_bias_reestim , + stat_tool::censoring_estimator estimator , bool combination = false , + stat_tool::duration_distribution_mean_estimator mean_estimator = stat_tool::COMPUTED) const; + + public : + + Renewal(); + Renewal(stat_tool::process_type itype , const stat_tool::FrequencyDistribution &htime , + const stat_tool::DiscreteParametric &iinter_event); + Renewal(stat_tool::process_type itype , const stat_tool::Distribution &itime , + const stat_tool::DiscreteParametric &iinter_event); + Renewal(const RenewalData &irenewal_data , + const stat_tool::DiscreteParametric &iinter_event); + Renewal(const Renewal &renew , bool data_flag = true) + { copy(renew , data_flag); } + ~Renewal(); + void conditional_delete(); + Renewal& operator=(const Renewal &renew); + + stat_tool::DiscreteParametricModel* extract(stat_tool::StatError &error , + renewal_distribution dist_type , + int itime = stat_tool::I_DEFAULT) const; + + static Renewal* build(stat_tool::StatError &error , const stat_tool::DiscreteParametric &inter_event , + stat_tool::process_type type = stat_tool::EQUILIBRIUM , int time = DEFAULT_TIME); + + static Renewal* ascii_read(stat_tool::StatError& error , const std::string path , + stat_tool::process_type type = stat_tool::EQUILIBRIUM , int time = DEFAULT_TIME , + double cumul_threshold = RENEWAL_THRESHOLD); + + std::ostream& line_write(std::ostream &os) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + void computation(bool inter_event_flag = true , stat_tool::process_type itype = stat_tool::DEFAULT_TYPE , + const stat_tool::Distribution *dtime = NULL); + + double likelihood_computation(const TimeEvents &timev) const; + + RenewalData* simulation(stat_tool::StatError &error , stat_tool::process_type itype , + const stat_tool::FrequencyDistribution &ihtime) const; + RenewalData* simulation(stat_tool::StatError &error , stat_tool::process_type itype , + int nb_element , int itime) const; + RenewalData* simulation(stat_tool::StatError &error , stat_tool::process_type itype , + int nb_element , const TimeEvents &itimev) const; + + // class member access + + int get_nb_iterator() const { return nb_iterator; } + RenewalData* get_renewal_data() const { return renewal_data; } + stat_tool::process_type get_type() const { return type; } + stat_tool::Distribution* get_time() const { return time; } + stat_tool::DiscreteParametric* get_inter_event() const { return inter_event; } + LengthBias* get_length_bias() const { return length_bias; } + Backward* get_backward() const { return backward; } + stat_tool::Forward* get_forward() const { return forward; } + stat_tool::DiscreteParametric* get_nevent_time(int inb_event) const { return nevent_time[inb_event]; } + NbEvent* get_nb_event(int itime) const { return nb_event[itime]; } + stat_tool::Distribution* get_mixture() const { return mixture; } + stat_tool::Curves* get_index_event() const { return index_event; } + }; + + + /// \brief Renewal process iterator for asynchronous simulation + + class RenewalIterator { + + private : + + Renewal *renewal; ///< pointer on a Renewal object + int interval; ///< time interval between events + int counter; ///< counter + int length; ///< sequence length + int *sequence; ///< sequence of events + + void copy(const RenewalIterator &iter); + + public : + + RenewalIterator(Renewal *irenewal , int ilength = 1); + RenewalIterator(const RenewalIterator &iter) + { copy(iter); } + ~RenewalIterator(); + RenewalIterator& operator=(const RenewalIterator &iter); + + void simulation(int ilength = 1 , stat_tool::process_type type = stat_tool::DEFAULT_TYPE); + + // class member access + + Renewal* get_renewal() const { return renewal; } + int get_interval() const { return interval; } + int get_counter() const { return counter; } + int get_length() const { return length; } + int get_sequence(int index) const { return sequence[index]; } + }; + + + /// \brief Triplets {observation period, number of events, frequency} + + class TimeEvents : public stat_tool::StatInterface { + + friend class stat_tool::FrequencyDistribution; + friend class Renewal; + + friend std::ostream& operator<<(std::ostream &os , const TimeEvents &timev) + { return timev.ascii_write(os , true); } + + protected : + + int nb_element; ///< sample size + int nb_class; ///< number of classes + int *time; ///< observation period + int *nb_event; ///< number of events + int *frequency; ///< frequency of each pair {observation period, number of events} + stat_tool::FrequencyDistribution *htime; ///< observation period frequency distribution + stat_tool::FrequencyDistribution **hnb_event; ///< number of events frequency distributions for the different observation periods + stat_tool::FrequencyDistribution *mixture; ///< mixture of the number of events frequency distributions + + void build_frequency_distribution(); + void build_sample(); + void build(int inb_element , int *itime , int *inb_event); + void copy(const TimeEvents&); + void merge(int nb_sample , const TimeEvents **ptimev); + void remove(); + + std::ostream& ascii_write(std::ostream &os , bool exhaustive , stat_tool::process_type type) const; + std::ostream& ascii_file_write(std::ostream &os , bool exhaustive , + stat_tool::process_type type = stat_tool::EQUILIBRIUM) const; + std::ostream& spreadsheet_write(std::ostream &os , + stat_tool::process_type type = stat_tool::EQUILIBRIUM) const; + + void nb_element_computation(); + double min_inter_event_computation() const; + + public : + + TimeEvents(int inb_class = 0); + TimeEvents(int inb_element , int *itime , int *inb_event) + { build(inb_element , itime , inb_event); } + TimeEvents(stat_tool::FrequencyDistribution &inb_event , int itime); + TimeEvents(int nb_sample , const TimeEvents **ptimev) { merge(nb_sample , ptimev); } + TimeEvents(const TimeEvents &timev) { copy(timev); } + ~TimeEvents(); + TimeEvents& operator=(const TimeEvents &timev); + + TimeEvents* merge(int nb_sample , const std::vector &itimev) const; + stat_tool::DiscreteDistributionData* extract(stat_tool::StatError &error , + renewal_distribution histo_type , + int itime = stat_tool::I_DEFAULT) const; + + TimeEvents* time_scaling(stat_tool::StatError &error , int scaling_coeff) const; + TimeEvents* time_select(stat_tool::StatError &error , int min_time , + int max_time) const; + TimeEvents* nb_event_select(stat_tool::StatError &error , int min_nb_event , + int max_nb_event) const; + + static TimeEvents* build(stat_tool::StatError &error , stat_tool::FrequencyDistribution &nb_event , int itime); + static TimeEvents* build(stat_tool::StatError &error , const std::vector > &time_nb_event); + + static TimeEvents* ascii_read(stat_tool::StatError &error , const std::string path); + static TimeEvents* old_ascii_read(stat_tool::StatError &error , const std::string path); + + std::ostream& line_write(std::ostream &os) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = true) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = true) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + double information_computation() const; + + Renewal* estimation(stat_tool::StatError &error , std::ostream *os , stat_tool::process_type type , + const stat_tool::DiscreteParametric &iinter_event , + stat_tool::estimation_criterion estimator = stat_tool::LIKELIHOOD , + int nb_iter = stat_tool::I_DEFAULT , + stat_tool::censoring_estimator equilibrium_estimator = stat_tool::COMPLETE_LIKELIHOOD , + stat_tool::duration_distribution_mean_estimator mean_estimator = stat_tool::COMPUTED , + double weight = stat_tool::D_DEFAULT , + stat_tool::penalty_type pen_type = stat_tool::SECOND_DIFFERENCE , + stat_tool::side_effect outside = stat_tool::ZERO) const; + Renewal* estimation(stat_tool::StatError &error , std::ostream *os , stat_tool::process_type type , + stat_tool::estimation_criterion estimator = stat_tool::LIKELIHOOD , + int nb_iter = stat_tool::I_DEFAULT , + stat_tool::censoring_estimator equilibrium_estimator = stat_tool::COMPLETE_LIKELIHOOD , + stat_tool::duration_distribution_mean_estimator mean_estimator = stat_tool::COMPUTED , + double weight = stat_tool::D_DEFAULT , + stat_tool::penalty_type pen_type = stat_tool::SECOND_DIFFERENCE , + stat_tool::side_effect outside = stat_tool::ZERO) const; + + // class member access + + int get_nb_element() const { return nb_element; } + int get_nb_class() const { return nb_class; } + stat_tool::FrequencyDistribution* get_htime() const { return htime; } + stat_tool::FrequencyDistribution* get_hnb_event(int itime) const { return hnb_event[itime]; } + stat_tool::FrequencyDistribution* get_mixture() const { return mixture; } + }; + + + /// \brief Data structure corresponding to a renewal process + + class RenewalData : public TimeEvents { + + friend class Renewal; + friend class Sequences; + + friend std::ostream& operator<<(std::ostream &os , RenewalData &timev) + { return timev.ascii_write(os , false); } + + private : + + Renewal *renewal; ///< pointer on a Renewal object + stat_tool::process_type type; ///< renewal process type (ORDINARY/EQUILIBRIUM) + int *length; ///< sequence length + int **sequence; ///< sequences of events + stat_tool::FrequencyDistribution *inter_event; ///< inter-event frequency distribution + stat_tool::FrequencyDistribution *within; ///< frequency distribution of time intervals between events within the observation period + stat_tool::FrequencyDistribution *length_bias; ///< length-biased frequency distribution + stat_tool::FrequencyDistribution *backward; ///< backward recurrence time frequency distribution + stat_tool::FrequencyDistribution *forward; ///< forward recurrence time frequency distribution + stat_tool::Curves *index_event; ///< empirical no-event/event probabilities as a function of time + + void copy(const RenewalData &timev , bool model_flag = true); + void remove(); + + std::ostream& ascii_write(std::ostream &os , bool exhaustive , bool file_flag) const; + std::ostream& spreadsheet_write(std::ostream &os) const; + + void build_index_event(int offset = 1); + + public : + + RenewalData(); + RenewalData(int nb_element , int itime); + RenewalData(stat_tool::process_type itype , const Renewal &renew); + RenewalData(const TimeEvents &timev , stat_tool::process_type itype); + RenewalData(int nb_sample , const RenewalData **itimev); + RenewalData(const RenewalData &timev , bool model_flag = true) + :TimeEvents(timev) { copy(timev , model_flag); } + ~RenewalData(); + RenewalData& operator=(const RenewalData&); + + RenewalData* merge(stat_tool::StatError &error , int nb_sample , const RenewalData **itimev) const; + RenewalData* merge(stat_tool::StatError &error , int nb_sample , const std::vector &itimev) const; + stat_tool::DiscreteDistributionData* extract(stat_tool::StatError &error , + renewal_distribution histo_type , + int itime = stat_tool::I_DEFAULT) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + Renewal* estimation(stat_tool::StatError &error , std::ostream *os , + const stat_tool::DiscreteParametric &iinter_event , + stat_tool::estimation_criterion estimator = stat_tool::LIKELIHOOD , + int nb_iter = stat_tool::I_DEFAULT , + stat_tool::duration_distribution_mean_estimator mean_estimator = stat_tool::COMPUTED , + double weight = stat_tool::D_DEFAULT , + stat_tool::penalty_type pen_type = stat_tool::SECOND_DIFFERENCE , + stat_tool::side_effect outside = stat_tool::ZERO) const; + Renewal* estimation(stat_tool::StatError &error , std::ostream *os , + stat_tool::estimation_criterion estimator = stat_tool::LIKELIHOOD , + int nb_iter = stat_tool::I_DEFAULT , + stat_tool::duration_distribution_mean_estimator mean_estimator = stat_tool::COMPUTED , + double weight = stat_tool::D_DEFAULT , + stat_tool::penalty_type pen_type = stat_tool::SECOND_DIFFERENCE , + stat_tool::side_effect outside = stat_tool::ZERO) const; + + // class member access + + Renewal* get_renewal() const { return renewal; } + stat_tool::process_type get_type() const { return type; } + int get_length(int index_seq) const { return length[index_seq]; } + int get_sequence(int index_seq , int index) const + { return sequence[index_seq][index]; } + stat_tool::FrequencyDistribution* get_inter_event() const { return inter_event; } + stat_tool::FrequencyDistribution* get_within() const { return within; } + stat_tool::FrequencyDistribution* get_length_bias() const { return length_bias; } + stat_tool::FrequencyDistribution* get_backward() const { return backward; } + stat_tool::FrequencyDistribution* get_forward() const { return forward; } + stat_tool::Curves* get_index_event() const { return index_event; } + }; + + +}; // namespace sequence_analysis + + + + +#endif diff --git a/src/cpp/sequence_analysis/renewal1.cpp b/src/cpp/sequence_analysis/renewal1.cpp new file mode 100644 index 0000000..1c65af4 --- /dev/null +++ b/src/cpp/sequence_analysis/renewal1.cpp @@ -0,0 +1,882 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include + +#include +#include + +#include "stat_tool/stat_label.h" + +#include "renewal.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the NbEvent class. + * + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] itime observation period, + * \param[in] inb_value number of values, + * \param[in] iident distribution identifier, + * \param[in] iinf_bound lower bound, + * \param[in] isup_bound upper bound (binomial, uniform), + * \param[in] iparameter parameter (Poisson, negative binomial), + * \param[in] iprobability probability (binomial, negative binomial). + */ +/*--------------------------------------------------------------*/ + +NbEvent::NbEvent(process_type itype , int itime , int inb_value , discrete_parametric iident , + int iinf_bound , int isup_bound , double iparameter , double iprobability) +:DiscreteParametric(inb_value , iident , iinf_bound , isup_bound , iparameter , iprobability) + +{ + type = itype; + time = itime; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the NbEvent class. + * + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] itime observation period, + * \param[in] inter_event reference on a DiscreteParametric object. + */ +/*--------------------------------------------------------------*/ + +NbEvent::NbEvent(process_type itype , int itime , DiscreteParametric &inter_event) + +{ + type = itype; + time = itime; + + switch (type) { + case ORDINARY : + Distribution::init(time / inter_event.offset + 1); + break; + case EQUILIBRIUM : + Distribution::init((time - 1) / inter_event.offset + 2); + break; + } + + ident = inter_event.ident; + + inf_bound = inter_event.inf_bound; + sup_bound = inter_event.sup_bound; + parameter = inter_event.parameter; + probability = inter_event.probability; + + switch (type) { + case ORDINARY : + ordinary_computation(inter_event); + break; + case EQUILIBRIUM : + computation(inter_event); + break; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor by copy of the NbEvent class. + * + * \param[in] nb_event reference on a NbEvent object, + * \param[in] ialloc_nb_value number of allocated values. + */ +/*--------------------------------------------------------------*/ + +NbEvent::NbEvent(const NbEvent &nb_event , int ialloc_nb_value) +:DiscreteParametric(nb_event , DISTRIBUTION_COPY , ialloc_nb_value) + +{ + type = nb_event.type; + time = nb_event.time; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Initialization of the renewal process type (ORDINARY/EQUILIBRIUM). + * + * \param[in] itype renewal process type. + */ +/*--------------------------------------------------------------*/ + +void Renewal::type_init(process_type itype) + +{ + if (itype != type) { + int i; + + + type = itype; + + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + nb_event[i]->type = itype; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Initialization of the inter-event distribution parameters. + * + * \param[in] inf_bound lower bound, + * \param[in] sup_bound upper bound (binomial, uniform), + * \param[in] parameter parameter (Poisson, negative binomial), + * \param[in] probability probability (binomial, negative binomial). + */ +/*--------------------------------------------------------------*/ + +void Renewal::init(int inf_bound , int sup_bound , double parameter , double probability) + +{ + int i; + + + inter_event->init(inf_bound , sup_bound , parameter , probability); + length_bias->init(inf_bound , sup_bound , parameter , probability); + backward->init(inf_bound , sup_bound , parameter , probability); + forward->init(inf_bound , sup_bound , parameter , probability); + + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + nb_event[i]->init(inf_bound , sup_bound , parameter , probability); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Initialization of the identifier and parameters of the inter-event distribution. + * + * \param[in] ident distribution identifier, + * \param[in] inf_bound lower bound, + * \param[in] sup_bound upper bound (binomial, uniform), + * \param[in] parameter parameter (Poisson, negative binomial), + * \param[in] probability probability (binomial, negative binomial). + */ +/*--------------------------------------------------------------*/ + +void Renewal::init(discrete_parametric ident , int inf_bound , int sup_bound , + double parameter , double probability) + +{ + int i; + + + inter_event->init(ident , inf_bound , sup_bound , parameter , probability); + length_bias->init(ident , inf_bound , sup_bound , parameter , probability); + backward->init(ident , inf_bound , sup_bound , parameter , probability); + forward->init(ident , inf_bound , sup_bound , parameter , probability); + + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + nb_event[i]->init(ident , inf_bound , sup_bound , parameter , probability); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the Renewal class. + */ +/*--------------------------------------------------------------*/ + +Renewal::Renewal() + +{ + nb_iterator = 0; + renewal_data = NULL; + + type = ORDINARY; + + time = NULL; + + inter_event = NULL; + length_bias = NULL; + backward = NULL; + forward = NULL; + + nb_event_max = 0; + nevent_time = NULL; + + nb_event = NULL; + mixture = NULL; + + index_event = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Renewal class. + * + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] htime reference on the observation period frequency distribution, + * \param[in] iinter_event reference on on the inter-event distribution. + */ +/*--------------------------------------------------------------*/ + +Renewal::Renewal(process_type itype , const FrequencyDistribution &htime , + const DiscreteParametric &iinter_event) + +{ + int i; + int nb_value; + + + nb_iterator = 0; + renewal_data = NULL; + + type = itype; + + time = new Distribution(htime); + + inter_event = new DiscreteParametric(iinter_event , DISTRIBUTION_COPY , + (int)(iinter_event.nb_value * NB_VALUE_COEFF)); + length_bias = new LengthBias(inter_event->alloc_nb_value , inter_event->ident , + inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + backward = new Backward(inter_event->alloc_nb_value - 1 , inter_event->ident , + inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + forward = new Forward(inter_event->alloc_nb_value , inter_event->ident , + inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + + nb_event = new NbEvent*[time->nb_value]; + + for (i = 0;i < time->offset;i++) { + nb_event[i] = NULL; + } + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + switch (type) { + case ORDINARY : + nb_value = i / inter_event->offset + 1; + break; + case EQUILIBRIUM : + nb_value = (i - 1) / inter_event->offset + 2; + break; + } + + nb_event[i] = new NbEvent(type , i , nb_value , inter_event->ident , + inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + } + + else { + nb_event[i] = NULL; + } + } + + mixture = new Distribution(nb_value); + + nb_event_max = nb_value - 1; + nevent_time = new DiscreteParametric*[nb_event_max + 1]; + for (i = 0;i <= nb_event_max;i++) { + nevent_time[i] = NULL; + } + + index_event = new Curves(2 , time->nb_value); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Renewal class. + * + * \param[in] itype renewal process type, + * \param[in] itime reference on the observation distribution, + * \param[in] iinter_event reference on the inter-event distribution. + */ +/*--------------------------------------------------------------*/ + +Renewal::Renewal(process_type itype , const Distribution &itime , + const DiscreteParametric &iinter_event) + +{ + int i; + int nb_value; + + + nb_iterator = 0; + renewal_data = NULL; + + type = itype; + + time = new Distribution(itime); + + inter_event = new DiscreteParametric(iinter_event , NORMALIZATION); + length_bias = new LengthBias(*inter_event); + backward = new Backward(inter_event->nb_value - 1 , inter_event->ident , + inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + forward = new Forward(*inter_event); + + nb_event = new NbEvent*[time->nb_value]; + + for (i = 0;i < time->offset;i++) { + nb_event[i] = NULL; + } + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + switch (type) { + case ORDINARY : + nb_value = i / inter_event->offset + 1; + break; + case EQUILIBRIUM : + nb_value = (i - 1) / inter_event->offset + 2; + break; + } + + nb_event[i] = new NbEvent(type , i , nb_value , inter_event->ident , + inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + } + + else { + nb_event[i] = NULL; + } + } + + mixture = new Distribution(nb_value); + + nb_event_max = nb_value - 1; + nevent_time = new DiscreteParametric*[nb_event_max + 1]; + for (i = 0;i <= nb_event_max;i++) { + nevent_time[i] = NULL; + } + + index_event = new Curves(2 , time->nb_value); + + computation(false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Renewal class. + * + * \param[in] irenewal_data reference on a RenewalData object, + * \param[in] iinter_event reference on the inter-event distribution. + */ +/*--------------------------------------------------------------*/ + +Renewal::Renewal(const RenewalData &irenewal_data , const DiscreteParametric &iinter_event) + +{ + int i; + int nb_value; + + + nb_iterator = 0; + renewal_data = new RenewalData(irenewal_data); + renewal_data->type = EQUILIBRIUM; + + type = EQUILIBRIUM; + + time = new Distribution(*(renewal_data->htime)); + + inter_event = new DiscreteParametric(iinter_event); + length_bias = new LengthBias(*inter_event); + backward = new Backward(inter_event->nb_value - 1 , inter_event->ident , + inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + forward = new Forward(*inter_event); + + nb_event = new NbEvent*[time->nb_value]; + + for (i = 0;i < time->offset;i++) { + nb_event[i] = NULL; + } + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + switch (type) { + case ORDINARY : + nb_value = i / inter_event->offset + 1; + break; + case EQUILIBRIUM : + nb_value = (i - 1) / inter_event->offset + 2; + break; + } + + nb_event[i] = new NbEvent(type , i , nb_value , inter_event->ident , + inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + } + + else { + nb_event[i] = NULL; + } + } + + mixture = new Distribution(nb_value); + + nb_event_max = nb_value - 1; + nevent_time = new DiscreteParametric*[nb_event_max + 1]; + for (i = 0;i <= nb_event_max;i++) { + nevent_time[i] = NULL; + } + + index_event = new Curves(2 , time->nb_value); + + computation(false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Renewal object. + * + * \param[in] renew reference on a Renewal object, + * \param[in] data_flag flag copy of the included RenewalData object. + */ +/*--------------------------------------------------------------*/ + +void Renewal::copy(const Renewal &renew , bool data_flag) + +{ + int i; + + + nb_iterator = 0; + + if ((data_flag) && (renew.renewal_data)) { + renewal_data = new RenewalData(*(renew.renewal_data)); + } + else { + renewal_data = NULL; + } + + type = renew.type; + + time = new Distribution(*(renew.time)); + + inter_event = new DiscreteParametric(*(renew.inter_event)); + length_bias = new LengthBias(*(renew.length_bias)); + backward = new Backward(*(renew.backward) , renew.backward->alloc_nb_value); + forward = new Forward(*(renew.forward)); + + nb_event_max = renew.mixture->nb_value - 1; + + nevent_time = new DiscreteParametric*[nb_event_max + 1]; + + nevent_time[0] = NULL; + for (i = 1;i <= nb_event_max;i++) { + if (renew.nevent_time[i]) { + nevent_time[i] = new DiscreteParametric(*(renew.nevent_time[i])); + } + else { + nevent_time[i] = NULL; + } + } + + nb_event = new NbEvent*[time->nb_value]; + + for (i = 0;i < time->offset;i++) { + nb_event[i] = NULL; + } + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + nb_event[i] = new NbEvent(*(renew.nb_event[i])); + } + else { + nb_event[i] = NULL; + } + } + + mixture = new Distribution(*(renew.mixture)); + + index_event = new Curves(*(renew.index_event)); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a Renewal object. + */ +/*--------------------------------------------------------------*/ + +void Renewal::remove() + +{ + int i; + + + delete renewal_data; + + delete inter_event; + delete length_bias; + delete backward; + delete forward; + + if (nevent_time) { + for (i = 1;i <= nb_event_max;i++) { + delete nevent_time[i]; + } + delete [] nevent_time; + } + + if (nb_event) { + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + delete nb_event[i]; + } + } + delete [] nb_event; + } + + delete mixture; + + delete index_event; + + delete time; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the Renewal class. + */ +/*--------------------------------------------------------------*/ + +Renewal::~Renewal() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of a Renewal object taking account of + * the number of iterators pointing to it. + */ +/*--------------------------------------------------------------*/ + +void Renewal::conditional_delete() + +{ + if (nb_iterator == 0) { + delete this; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the Renewal class. + * + * \param[in] renew reference on a Renewal object. + * + * \return Renewal object. + */ +/*--------------------------------------------------------------*/ + +Renewal& Renewal::operator=(const Renewal &renew) + +{ + if ((&renew != this) && (nb_iterator == 0)) { + remove(); + copy(renew); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] dist_type distribution type, + * \param[in] itime observation period. + * + * \return DiscreteParametricModel object. + */ +/*--------------------------------------------------------------*/ + +DiscreteParametricModel* Renewal::extract(StatError &error , renewal_distribution dist_type , + int itime) const + +{ + Distribution *pdist; + DiscreteParametric *pparam; + DiscreteParametricModel *dist; + FrequencyDistribution *phisto; + + + if (dist_type == NB_EVENT) { + error.init(); + + if ((itime < time->offset) || (itime >= time->nb_value) || (time->mass[itime] == 0.)) { + dist = NULL; + error.update(SEQ_error[SEQR_OBSERVATION_TIME]); + } + else { + dist = new DiscreteParametricModel(*((Distribution*)nb_event[itime]) , + (renewal_data ? renewal_data->hnb_event[itime] : 0)); + } + } + + else { + pdist = NULL; + pparam = NULL; + + switch (dist_type) { + case INTER_EVENT : + pparam = inter_event; + break; + case LENGTH_BIAS : + pdist = length_bias; + break; + case BACKWARD_RECURRENCE_TIME : + pdist = backward; + break; + case FORWARD_RECURRENCE_TIME : + pdist = forward; + break; + case NB_EVENT_MIXTURE : + pdist = mixture; + break; + } + + phisto = NULL; + if (renewal_data) { + switch (dist_type) { + + case INTER_EVENT : { + if (renewal_data->inter_event) { + phisto = renewal_data->inter_event; + } + break; + } + + case LENGTH_BIAS : { + if (renewal_data->length_bias) { + phisto = renewal_data->length_bias; + } + break; + } + + case BACKWARD_RECURRENCE_TIME : { + phisto = renewal_data->backward; + break; + } + + case FORWARD_RECURRENCE_TIME : { + phisto = renewal_data->forward; + break; + } + + case NB_EVENT_MIXTURE : { + phisto = renewal_data->mixture; + break; + } + } + + if ((phisto) && (phisto->nb_element == 0)) { + phisto = NULL; + } + } + + if (pdist) { + dist = new DiscreteParametricModel(*pdist , phisto); + } + else if (pparam) { + dist = new DiscreteParametricModel(*pparam , phisto); + } + } + + return dist; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a Renewal object on the basis of an inter-event distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] inter_event reference on the inter-event distribution, + * \param[in] type renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] time observation period. + * + * \return Renewal object. + */ +/*--------------------------------------------------------------*/ + +Renewal* Renewal::build(StatError &error , const DiscreteParametric &inter_event , + process_type type , int time) + +{ + bool status = true; + Renewal *renew; + + + renew = NULL; + error.init(); + + if (inter_event.offset == 0) { + status = false; + error.update(STAT_error[STATR_MIN_VALUE]); + } + if (time < MAX(inter_event.offset , 2)) { + status = false; + error.update(SEQ_error[SEQR_SHORT_OBSERVATION_TIME]); + } + if (time > MAX_TIME) { + status = false; + error.update(SEQ_error[SEQR_LONG_OBSERVATION_TIME]); + } + + if (status) { + DiscreteParametric dtime(UNIFORM , time , time , D_DEFAULT , D_DEFAULT); + renew = new Renewal(type , dtime , inter_event); + } + + return renew; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a Renewal object from a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] type renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] time observation period, + * \param[in] cumul_threshold threshold on the cumulative inter-event distribution function. + * + * \return Renewal object. + */ +/*--------------------------------------------------------------*/ + +Renewal* Renewal::ascii_read(StatError &error , const string path , + process_type type , int time , double cumul_threshold) + +{ + string buffer; + size_t position; + bool status; + int line; + DiscreteParametric *inter_event; + Renewal *renew; + ifstream in_file(path.c_str()); + + + renew = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + line = 0; + + inter_event = DiscreteParametric::parsing(error , in_file , line , + NEGATIVE_BINOMIAL , cumul_threshold , 1); + + if (!inter_event) { + status = false; + } + + else { + if (time < MAX(inter_event->offset , 2)) { + status = false; + error.update(SEQ_error[SEQR_SHORT_OBSERVATION_TIME]); + } + if (time > MAX_TIME) { + status = false; + error.update(SEQ_error[SEQR_LONG_OBSERVATION_TIME]); + } + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + if (!(trim_right_copy_if(buffer , is_any_of(" \t")).empty())) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + + if (status) { + DiscreteParametric dtime(UNIFORM , time , time , D_DEFAULT , D_DEFAULT); + renew = new Renewal(type , dtime , *inter_event); + } + + delete inter_event; + } + + return renew; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/renewal2.cpp b/src/cpp/sequence_analysis/renewal2.cpp new file mode 100644 index 0000000..ffd613b --- /dev/null +++ b/src/cpp/sequence_analysis/renewal2.cpp @@ -0,0 +1,2643 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "renewal.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing on a single line of a Renewal object. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& Renewal::line_write(ostream &os) const + +{ + switch (type) { + case ORDINARY : + os << SEQ_label[SEQL_ORDINARY_RENEWAL] << " - "; + break; + case EQUILIBRIUM : + os << SEQ_label[SEQL_EQUILIBRIUM_RENEWAL] << " - "; + break; + } + + os << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_discrete_distribution_word[inter_event->ident] << " " + << STAT_label[STATL_MEAN] << ": " << inter_event->mean << " " + << STAT_label[STATL_VARIANCE] << ": " << inter_event->variance; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a renewal process and the associated data structure. + * + * \param[in,out] os stream, + * \param[in] timev pointer on a RenewalData object, + * \param[in] exhaustive flag detail level, + * \param[in] file_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& Renewal::ascii_write(ostream &os , const RenewalData *timev , + bool exhaustive , bool file_flag) const + +{ + int i , j; + int nb_dist , inf , sup; + double likelihood , information , *scale; + const Distribution **pdist; + Test test(CHI2); + + + if (file_flag) { + os << "# "; + } + switch (type) { + case ORDINARY : + os << SEQ_label[SEQL_ORDINARY_RENEWAL] << endl; + break; + case EQUILIBRIUM : + os << SEQ_label[SEQL_EQUILIBRIUM_RENEWAL] << endl; + break; + } + + // writing of the inter-event distribution + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + inter_event->ascii_print(os); + inter_event->ascii_parametric_characteristic_print(os , false , file_flag); + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_VARIATION_COEFF] << ": " + << sqrt(inter_event->variance) / inter_event->mean << endl; + + if ((timev) && (timev->within) && (timev->backward) && (timev->forward)) { + if (timev->inter_event) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + timev->inter_event->ascii_characteristic_print(os , false , file_flag); + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_VARIATION_COEFF] << ": " + << sqrt(timev->inter_event->variance) / timev->inter_event->mean << endl; + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_INTER_EVENT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_INTER_EVENT] << " " + << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + inter_event->Distribution::ascii_print(os , file_flag , true , false , timev->inter_event); + } + } + + if (exhaustive) { + pdist = new const Distribution*[1]; + scale = new double[1]; + + pdist[0] = inter_event; + + // writing of the frequency distribution of time intervals between events within the observation period + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + timev->within->ascii_characteristic_print(os , false , file_flag); + + if (timev->within->nb_element > 0) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << " | " << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_INTER_EVENT] << " " + << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + inter_event->Distribution::ascii_print(os , file_flag , true , false , timev->within); + } + + // writing of the length-biased distribution + + if (timev->length_bias) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + length_bias->ascii_characteristic_print(os , false , file_flag); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + timev->length_bias->ascii_characteristic_print(os , false , file_flag); + + scale[0] = timev->length_bias->nb_element; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << " | " << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_LENGTH_BIASED] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_LENGTH_BIASED] << " " + << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + length_bias->Distribution::ascii_print(os , 1 , pdist , scale , file_flag , true , + timev->length_bias , true); + } + + // writing of the backward recurrence time distribution + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION] << endl; + backward->ascii_characteristic_print(os , false , file_flag); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + timev->backward->ascii_characteristic_print(os , false , file_flag); + + scale[0] = timev->backward->nb_element; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " | " << STAT_label[STATL_BACKWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_BACKWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << " " << STAT_label[STATL_FUNCTION] << " | " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + backward->Distribution::ascii_print(os , 1 , pdist , scale , file_flag , true , + timev->backward , true); + + // writing of the forward recurrence time distribution + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION] << endl; + forward->ascii_characteristic_print(os , false , file_flag); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + timev->forward->ascii_characteristic_print(os , false , file_flag); + + scale[0] = timev->forward->nb_element; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " | " << STAT_label[STATL_FORWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME]<< " " << STAT_label[STATL_DISTRIBUTION] + << " | " << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FORWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << " " << STAT_label[STATL_FUNCTION] << " | " << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + forward->Distribution::ascii_print(os , 1 , pdist , scale , file_flag , true , + timev->forward , true); + + delete [] pdist; + delete [] scale; + } + } + + if ((exhaustive) && ((!timev) || (!(timev->length_bias)))) { + + // writing of the inter-event distribution, the length-biased distribution, + // the backward and forward recurrence time distributions + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + length_bias->ascii_characteristic_print(os , false , file_flag); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION] << endl; + backward->ascii_characteristic_print(os , false , file_flag); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION] << endl; + forward->ascii_characteristic_print(os , false , file_flag); + + pdist = new const Distribution*[3]; + scale = new double[3]; + + pdist[0] = length_bias; + scale[0] = 1.; + pdist[1] = backward; + scale[1] = 1.; + pdist[2] = forward; + scale[2] = 1.; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << " | " << STAT_label[STATL_FORWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << " | " << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_INTER_EVENT] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + inter_event->Distribution::ascii_print(os , 3 , pdist , scale , file_flag , true , + NULL , true); + + delete [] pdist; + delete [] scale; + } + + if (exhaustive) { + + // writing of the distributions of the time to the nth event + + inf = (timev ? timev->mixture->offset : mixture->offset); + if (inf < 1) { + inf = 1; + } + + sup = (timev ? timev->mixture->nb_value : mixture->nb_value); + + if (inf < sup) { + pdist = new const Distribution*[sup]; + scale = new double[sup]; + nb_dist = 0; + + for (i = inf + 1;i < sup;i++) { + pdist[nb_dist] = nevent_time[i]; + scale[nb_dist++] = 1.; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + for (i = inf;i < sup;i++) { + os << " | " << SEQ_label[SEQL_TIME_UP] << " " << i << " " << STAT_label[STATL_DISTRIBUTION]; + } + os << endl; + + nevent_time[inf]->Distribution::ascii_print(os , nb_dist , pdist , scale , file_flag , + false , NULL , true); + + delete [] pdist; + delete [] scale; + } + } + + // writing of the number of events distributions + + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + nb_event[i]->ascii_characteristic_print(os , false , file_flag); + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_VARIANCE_MEAN_RATIO] << ": " << nb_event[i]->variance / nb_event[i]->mean << endl; + if (nb_event[i]->variance > 0.) { + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_SKEWNESS_COEFF] << ": " << nb_event[i]->skewness_computation() << " " + << STAT_label[STATL_KURTOSIS_COEFF] << ": " << nb_event[i]->kurtosis_computation() << endl; + } + + switch (type) { + + case ORDINARY : { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << 1. / (nb_event[i]->mean + 1.) << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " + << nb_event[i]->mean / (nb_event[i]->mean + 1.) << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << ": " + << nb_event[i]->mass[0] / (nb_event[i]->mean + 1.) << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " + << 2. * (1. - nb_event[i]->mass[0]) / (nb_event[i]->mean + 1.) << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " + << (nb_event[i]->mean - 1. + nb_event[i]->mass[0]) / (nb_event[i]->mean + 1.) << endl; + break; + } + } + + if (timev) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + timev->hnb_event[i]->ascii_characteristic_print(os , false , file_flag); + if (timev->hnb_event[i]->mean > 0.) { + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_VARIANCE_MEAN_RATIO] << ": " + << timev->hnb_event[i]->variance / timev->hnb_event[i]->mean << endl; + } + if (timev->hnb_event[i]->variance > 0.) { + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_SKEWNESS_COEFF] << ": " << timev->hnb_event[i]->skewness_computation() << " " + << STAT_label[STATL_KURTOSIS_COEFF] << ": " << timev->hnb_event[i]->kurtosis_computation() << endl; + } + + switch (type) { + + case ORDINARY : { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << timev->hnb_event[i]->nb_element << " (" + << 1. / (timev->hnb_event[i]->mean + 1.) << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << timev->hnb_event[i]->mean * + timev->hnb_event[i]->nb_element << " (" + << timev->hnb_event[i]->mean / (timev->hnb_event[i]->mean + 1.) << ")" << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << ": " << timev->hnb_event[i]->frequency[0] << " (" + << timev->hnb_event[i]->frequency[0] / (timev->hnb_event[i]->nb_element * (timev->hnb_event[i]->mean + 1.)) + << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << 2 * (timev->hnb_event[i]->nb_element - + timev->hnb_event[i]->frequency[0]) << " (" + << 2. * (timev->hnb_event[i]->nb_element - timev->hnb_event[i]->frequency[0]) / + (timev->hnb_event[i]->nb_element * (timev->hnb_event[i]->mean + 1.)) << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << (timev->hnb_event[i]->mean - 1.) * + timev->hnb_event[i]->nb_element + timev->hnb_event[i]->frequency[0] << " (" + << (timev->hnb_event[i]->mean - 1. + (double)timev->hnb_event[i]->frequency[0] / + (double)timev->hnb_event[i]->nb_element) / (timev->hnb_event[i]->mean + 1.) << ")" << endl; + break; + } + } + + likelihood = nb_event[i]->likelihood_computation(*timev->hnb_event[i]); + information = timev->hnb_event[i]->information_computation(); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_LIKELIHOOD] << ": "<< likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << likelihood / timev->hnb_event[i]->nb_element << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_MAX_LIKELIHOOD] << ": " << information << " (" + << STAT_label[STATL_INFORMATION] << ": " << information / timev->hnb_event[i]->nb_element << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_DEVIANCE] << ": " << 2 * (information - likelihood) << endl; + + nb_event[i]->chi2_fit(*(timev->hnb_event[i]) , test); + os << "\n"; + test.ascii_print(os , file_flag); + } + + if ((exhaustive) && ((time->variance == 0.) || (timev))) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if (timev) { + os << " | " << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i + << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + os << " | " << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i + << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION]; + if (timev) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + nb_event[i]->Distribution::ascii_print(os , file_flag , true , false , + (timev ? timev->hnb_event[i] : NULL)); + } + } + } + + if (time->variance > 0.) { + + // writing of the mixture of number of events distributions + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_EVENT_MIXTURE] << endl; + mixture->ascii_characteristic_print(os , false , file_flag); + + switch (type) { + + case ORDINARY : { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << 1. / (mixture->mean + 1.) << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " + << mixture->mean / (mixture->mean + 1.) << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << ": " + << mixture->mass[0] / (mixture->mean + 1.) << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " + << 2. * (1. - mixture->mass[0]) / (mixture->mean + 1.) << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " + << (mixture->mean - 1. + mixture->mass[0]) / (mixture->mean + 1.) << endl; + break; + } + } + + if (timev) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + timev->mixture->ascii_characteristic_print(os , false , file_flag); + + switch (type) { + + case ORDINARY : { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << timev->mixture->nb_element << " (" + << 1. / (timev->mixture->mean + 1.) << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << timev->mixture->mean * + timev->mixture->nb_element << " (" + << timev->mixture->mean / (timev->mixture->mean + 1.) << ")" << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << ": " << timev->mixture->frequency[0] << " (" + << timev->mixture->frequency[0] / (timev->mixture->nb_element * (timev->mixture->mean + 1.)) + << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << 2 * (timev->mixture->nb_element - + timev->mixture->frequency[0]) << " (" + << 2. * (timev->mixture->nb_element - timev->mixture->frequency[0]) / + (timev->mixture->nb_element * (timev->mixture->mean + 1.)) << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << (timev->mixture->mean - 1.) * + timev->mixture->nb_element + timev->mixture->frequency[0] << " (" + << (timev->mixture->mean - 1. + (double)timev->mixture->frequency[0] / + (double)timev->mixture->nb_element) / (timev->mixture->mean + 1.) << ")" << endl; + break; + } + } + + likelihood = likelihood_computation(*timev); + information = timev->information_computation(); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << likelihood / timev->nb_element << ")" << endl; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_MAX_LIKELIHOOD] << ": " << information << " (" + << STAT_label[STATL_INFORMATION] << ": " << information / timev->nb_element << ")" << endl; + + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_DEVIANCE] << ": " << 2 * (information - likelihood) << endl; + + mixture->chi2_fit(*(timev->mixture) , test); + os << "\n"; + test.ascii_print(os , file_flag); + } + + pdist = new const Distribution*[time->nb_value]; + scale = new double[time->nb_value]; + nb_dist = 0; + + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + pdist[nb_dist] = nb_event[i]; + + if (timev) { + scale[nb_dist++] = timev->nb_element * time->mass[i]; + } + else { + scale[nb_dist++] = time->mass[i]; + } + } + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if (timev) { + os << " | " << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + os << " | " << SEQ_label[SEQL_NB_EVENT_MIXTURE]; + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + os << " | " << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i + << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION]; + } + } + if (timev) { + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << " | " << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] << " " + << STAT_label[STATL_FUNCTION] << endl; + + mixture->ascii_print(os , nb_dist , pdist , scale , file_flag , true , + (timev ? timev->mixture : NULL)); + + delete [] pdist; + delete [] scale; + } + + // writing of the observation period frequency distribution + + if (timev) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + timev->htime->ascii_characteristic_print(os , false , file_flag); + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + timev->htime->ascii_print(os , file_flag); + } + } + } + + if (exhaustive) { + + // writing of no-event/event probabilities as a function of the index parameter + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + + if ((timev) && (timev->index_event)) { + os << " | " << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY]; + } + os << " | " << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY]; + if ((timev) && (timev->index_event)) { + os << " | " << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_EVENT_PROBABILITY]; + } + os << " | " << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_EVENT_PROBABILITY]; + if ((timev) && (timev->index_event)) { + os << " | " << STAT_label[STATL_FREQUENCY]; + } + os << endl; + + index_event->ascii_print(os , file_flag , (((timev) && (timev->index_event)) ? timev->index_event : NULL)); + + // writing of the sequences of events + + if ((timev) && (timev->sequence)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_SEQUENCES] << endl; + + for (i = 0;i < timev->nb_element;i++) { + os << "\n"; + if (file_flag) { + os << "# "; + } + + for (j = 0;j < timev->length[i];j++) { + if ((j > 0) && ((2 * j) % LINE_NB_CHARACTER == 0)) { + os << "\\" << endl; + if (file_flag) { + os << "# "; + } + } + + os << timev->sequence[i][j] << " "; + } + + os << endl; + } + } + } + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Renewal object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& Renewal::ascii_write(ostream &os , bool exhaustive) const + +{ + return ascii_write(os , renewal_data , exhaustive , false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Renewal object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Renewal::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + ascii_write(out_file , renewal_data , exhaustive , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a renewal process and the associated data structure at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] timev pointer on a RenewalData object. + */ +/*--------------------------------------------------------------*/ + +ostream& Renewal::spreadsheet_write(ostream &os , const RenewalData *timev) const + +/* { + os << inter_event->cumul[1] << "\t" << inter_event->cumul[4] << "\t" << inter_event->cumul[12] << "\t" + os << inter_event->cumul[6] << "\t" << inter_event->cumul[9] << "\t" << inter_event->cumul[12] << "\t" + << inter_event->mean << "\t" << sqrt(inter_event->variance) << "\t" + << inter_event->second_difference_norm_computation() << "\t" + << timev->hnb_event[time->offset]->mean << "\t" << nb_event[time->offset]->mean << "\t" + << timev->hnb_event[time->offset]->variance << "\t" << nb_event[time->offset]->variance << "\t" + << 2 * (timev->information_computation()- likelihood_computation(*timev)) << endl; + + return os; +} */ + +{ + int i; + int nb_dist , inf , sup; + double likelihood , information , *scale; + const Distribution **pdist; + Test test(CHI2); + + + switch (type) { + case ORDINARY : + os << SEQ_label[SEQL_ORDINARY_RENEWAL] << endl; + break; + case EQUILIBRIUM : + os << SEQ_label[SEQL_EQUILIBRIUM_RENEWAL] << endl; + break; + } + + // writing of the inter-event distribution + + os << "\n" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + inter_event->spreadsheet_print(os); + inter_event->spreadsheet_parametric_characteristic_print(os); + os << STAT_label[STATL_VARIATION_COEFF] << "\t" + << sqrt(inter_event->variance) / inter_event->mean << endl; + + if ((timev) && (timev->within) && (timev->backward) && (timev->forward)) { + if (timev->inter_event) { + os << "\n" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + timev->inter_event->spreadsheet_characteristic_print(os); + os << STAT_label[STATL_VARIATION_COEFF] << "\t" + << sqrt(timev->inter_event->variance) / timev->inter_event->mean << endl; + + os << "\n\t" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\t" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_INTER_EVENT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_INTER_EVENT] << " " + << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + inter_event->Distribution::spreadsheet_print(os , true , false , false , timev->inter_event); + } + + pdist = new const Distribution*[1]; + scale = new double[1]; + + pdist[0] = inter_event; + + // writing of the frequency distribution of time intervals between events within the observation period + + os << "\n" << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + timev->within->spreadsheet_characteristic_print(os); + + if (timev->within->nb_element > 0) { + os << "\n\t" << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\t" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_INTER_EVENT] << " " + << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + inter_event->Distribution::spreadsheet_print(os , true , false , false , timev->within); + } + + // writing of the length-biased distribution + + if (timev->length_bias) { + os << "\n" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + length_bias->spreadsheet_characteristic_print(os); + os << "\n" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + timev->length_bias->spreadsheet_characteristic_print(os); + + scale[0] = timev->length_bias->nb_element; + + os << "\n\t" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\t" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_LENGTH_BIASED] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_LENGTH_BIASED] << " " + << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + length_bias->Distribution::spreadsheet_print(os , 1 , pdist , scale , true , + timev->length_bias , true); + } + + // writing of the backward recurrence time distribution + + os << "\n" << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION] << endl; + backward->spreadsheet_characteristic_print(os); + os << "\n" << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + timev->backward->spreadsheet_characteristic_print(os); + + scale[0] = timev->backward->nb_element; + + os << "\n\t" << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t" << STAT_label[STATL_BACKWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_BACKWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << " " << STAT_label[STATL_FUNCTION] << "\t" << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + backward->Distribution::spreadsheet_print(os , 1 , pdist , scale , true , + timev->backward , true); + + // writing of the forward recurrence time distribution + + os << "\n" << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << endl; + forward->spreadsheet_characteristic_print(os); + os << "\n" << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + timev->forward->spreadsheet_characteristic_print(os); + + scale[0] = timev->forward->nb_element; + + os << "\n\t" << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t" << STAT_label[STATL_FORWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FORWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << " " << STAT_label[STATL_FUNCTION] << "\t" << STAT_label[STATL_CUMULATIVE] + << " " << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + forward->Distribution::spreadsheet_print(os , 1 , pdist , scale , true , + timev->forward , true); + + delete [] pdist; + delete [] scale; + } + + if ((!timev) || (!(timev->length_bias))) { + + // writing of the inter-event distribution, the length-biased distribution + // the backward and forward recurrence time distributions + + os << "\n" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + length_bias->spreadsheet_characteristic_print(os); + os << "\n" << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION] << endl; + backward->spreadsheet_characteristic_print(os); + os << "\n" << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION] << endl; + forward->spreadsheet_characteristic_print(os); + + pdist = new const Distribution*[3]; + scale = new double[3]; + + pdist[0] = length_bias; + scale[0] = 1.; + pdist[1] = backward; + scale[1] = 1.; + pdist[2] = forward; + scale[2] = 1.; + + os << "\n\t" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << "\t" << STAT_label[STATL_FORWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_DISTRIBUTION] + << "\t" << STAT_label[STATL_CUMULATIVE] << " " << SEQ_label[SEQL_INTER_EVENT] + << " " << STAT_label[STATL_DISTRIBUTION] << " " << STAT_label[STATL_FUNCTION] << endl; + + inter_event->Distribution::spreadsheet_print(os , 3 , pdist , scale , true , NULL , true); + + delete [] pdist; + delete [] scale; + } + + // writing of the distributions of the time to the nth event + + inf = (timev ? timev->mixture->offset : mixture->offset); + if (inf < 1) { + inf = 1; + } + + sup = (timev ? timev->mixture->nb_value : mixture->nb_value); + + if (inf < sup) { + pdist = new const Distribution*[sup]; + scale = new double[sup]; + nb_dist = 0; + + for (i = inf + 1;i < sup;i++) { + pdist[nb_dist] = nevent_time[i]; + scale[nb_dist++] = 1.; + } + + os << "\n"; + for (i = inf;i < sup;i++) { + os << "\t" << SEQ_label[SEQL_TIME_UP] << " " << i << " " << STAT_label[STATL_DISTRIBUTION]; + } + os << endl; + + nevent_time[inf]->Distribution::spreadsheet_print(os , nb_dist , pdist , scale , + false , NULL , true); + + delete [] pdist; + delete [] scale; + } + + // writing of the number of events distributions + + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + os << "\n" << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i + << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION] << endl; + nb_event[i]->spreadsheet_characteristic_print(os); + os << STAT_label[STATL_VARIANCE_MEAN_RATIO] << "\t" << nb_event[i]->variance / nb_event[i]->mean << endl; + if (nb_event[i]->variance > 0.) { + os << STAT_label[STATL_SKEWNESS_COEFF] << "\t" << nb_event[i]->skewness_computation() << "\t" + << STAT_label[STATL_KURTOSIS_COEFF] << "\t" << nb_event[i]->kurtosis_computation() << endl; + } + + switch (type) { + + case ORDINARY : { + os << "\n" << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << 1. / (nb_event[i]->mean + 1.) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" + << nb_event[i]->mean / (nb_event[i]->mean + 1.) << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n" << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << "\t" + << nb_event[i]->mass[0] / (nb_event[i]->mean + 1.) << endl; + + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" + << 2. * (1. - nb_event[i]->mass[0]) / (nb_event[i]->mean + 1.) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" + << (nb_event[i]->mean - 1. + nb_event[i]->mass[0]) / (nb_event[i]->mean + 1.) << endl; + break; + } + } + + if (timev) { + os << "\n" << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i + << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + timev->hnb_event[i]->spreadsheet_characteristic_print(os); + if (timev->hnb_event[i]->mean > 0.) { + os << STAT_label[STATL_VARIANCE_MEAN_RATIO] << "\t" + << timev->hnb_event[i]->variance / timev->hnb_event[i]->mean << endl; + } + if (timev->hnb_event[i]->variance > 0.) { + os << STAT_label[STATL_SKEWNESS_COEFF] << "\t" << timev->hnb_event[i]->skewness_computation() << "\t" + << STAT_label[STATL_KURTOSIS_COEFF] << "\t" << timev->hnb_event[i]->kurtosis_computation() << endl; + } + + switch (type) { + + case ORDINARY : { + os << "\n" << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << timev->hnb_event[i]->nb_element << "\t" + << 1. / (timev->hnb_event[i]->mean + 1.) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" << timev->hnb_event[i]->mean * + timev->hnb_event[i]->nb_element << "\t" + << timev->hnb_event[i]->mean / (timev->hnb_event[i]->mean + 1.) << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n" << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << "\t" << timev->hnb_event[i]->frequency[0] << "\t" + << timev->hnb_event[i]->frequency[0] / (timev->hnb_event[i]->nb_element * (timev->hnb_event[i]->mean + 1.)) << endl; + + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << 2 * (timev->hnb_event[i]->nb_element - + timev->hnb_event[i]->frequency[0]) << "\t" + << 2. * (timev->hnb_event[i]->nb_element - timev->hnb_event[i]->frequency[0]) / + (timev->hnb_event[i]->nb_element * (timev->hnb_event[i]->mean + 1.)) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" << (timev->hnb_event[i]->mean - 1.) * + timev->hnb_event[i]->nb_element + timev->hnb_event[i]->frequency[0] << "\t" + << (timev->hnb_event[i]->mean - 1. + (double)timev->hnb_event[i]->frequency[0] / + (double)timev->hnb_event[i]->nb_element) / (timev->hnb_event[i]->mean + 1.) << endl; + break; + } + } + + likelihood = nb_event[i]->likelihood_computation(*timev->hnb_event[i]); + information = timev->hnb_event[i]->information_computation(); + + os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << likelihood / timev->hnb_event[i]->nb_element << endl; + os << STAT_label[STATL_MAX_LIKELIHOOD] << "\t" << information << "\t" + << STAT_label[STATL_INFORMATION] << "\t" << information / timev->hnb_event[i]->nb_element << endl; + os << STAT_label[STATL_DEVIANCE] << "\t" << 2 * (information - likelihood) << endl; + + nb_event[i]->chi2_fit(*(timev->hnb_event[i]) , test); + os << "\n"; + test.spreadsheet_print(os); + } + + if ((time->variance == 0.) || (timev)) { + os << "\n"; + if (timev) { + os << "\t" << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i + << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + os << "\t" << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i + << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION]; + if (timev) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION] << endl; + + nb_event[i]->Distribution::spreadsheet_print(os , true , false , false , + (timev ? timev->hnb_event[i] : NULL)); + } + } + } + + if (time->variance > 0.) { + + // writing of the mixture of number of events distributions + + os << "\n" << SEQ_label[SEQL_NB_EVENT_MIXTURE] << endl; + mixture->spreadsheet_characteristic_print(os); + + switch (type) { + + case ORDINARY : { + os << "\n" << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << 1. / (mixture->mean + 1.) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" + << mixture->mean / (mixture->mean + 1.) << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n" << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << "\t" + << mixture->mass[0] / (mixture->mean + 1.) << endl; + + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" + << 2. * (1. - mixture->mass[0]) / (mixture->mean + 1.) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" + << (mixture->mean - 1. + mixture->mass[0]) / (mixture->mean + 1.) << endl; + break; + } + } + + if (timev) { + os << "\n" << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + timev->mixture->spreadsheet_characteristic_print(os); + + switch (type) { + + case ORDINARY : { + os << "\n" << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << timev->mixture->nb_element << "\t" + << 1. / (timev->mixture->mean + 1.) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" << timev->mixture->mean * + timev->mixture->nb_element << "\t" + << timev->mixture->mean / (timev->mixture->mean + 1.) << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n" << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << "\t" << timev->mixture->frequency[0] << "\t" + << timev->mixture->frequency[0] / (timev->mixture->nb_element * (timev->mixture->mean + 1.)) << endl; + + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << 2 * (timev->mixture->nb_element - + timev->mixture->frequency[0]) << "\t" + << 2. * (timev->mixture->nb_element - timev->mixture->frequency[0]) / + (timev->mixture->nb_element * (timev->mixture->mean + 1.)) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" << (timev->mixture->mean - 1.) * + timev->mixture->nb_element + timev->mixture->frequency[0] << "\t" + << (timev->mixture->mean - 1. + (double)timev->mixture->frequency[0] / + (double)timev->mixture->nb_element) / (timev->mixture->mean + 1.) << endl; + break; + } + } + + likelihood = likelihood_computation(*timev); + information = timev->information_computation(); + + os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << likelihood / timev->nb_element << endl; + os << STAT_label[STATL_MAX_LIKELIHOOD] << "\t" << information << "\t" + << STAT_label[STATL_INFORMATION] << "\t" << information / timev->nb_element << endl; + os << STAT_label[STATL_DEVIANCE] << "\t" << 2 * (information - likelihood) << endl; + + mixture->chi2_fit(*(timev->mixture) , test); + os << "\n"; + test.spreadsheet_print(os); + } + + pdist = new const Distribution*[time->nb_value]; + scale = new double[time->nb_value]; + nb_dist = 0; + + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + pdist[nb_dist] = nb_event[i]; + + if (timev) { + scale[nb_dist++] = timev->nb_element * time->mass[i]; + } + else { + scale[nb_dist++] = time->mass[i]; + } + } + } + + os << "\n"; + if (timev) { + os << "\t" << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + os << "\t" << SEQ_label[SEQL_NB_EVENT_MIXTURE]; + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + os << "\t" << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << i + << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION]; + } + } + if (timev) { + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + } + os << "\t" << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_MIXTURE] << " " + << STAT_label[STATL_FUNCTION] << endl; + + mixture->spreadsheet_print(os , nb_dist , pdist , scale , true , + (timev ? timev->mixture : NULL)); + + delete [] pdist; + delete [] scale; + + // writing of the observation period frequency distribution + + if (timev) { + os << "\n" << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + timev->htime->spreadsheet_characteristic_print(os); + + os << "\n\t" << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + timev->htime->spreadsheet_print(os); + } + } + + // writing of no-event/event probabilities as a function of time + + os << "\n"; + if ((timev) && (timev->index_event)) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY]; + } + os << "\t" << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY]; + if ((timev) && (timev->index_event)) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_EVENT_PROBABILITY]; + } + os << "\t" << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_EVENT_PROBABILITY]; + if ((timev) && (timev->index_event)) { + os << "\t" << STAT_label[STATL_FREQUENCY]; + } + os << endl; + + index_event->spreadsheet_print(os , (((timev) && (timev->index_event)) ? timev->index_event : NULL)); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Renewal object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Renewal::spreadsheet_write(StatError &error , const string path) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + spreadsheet_write(out_file , renewal_data); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a renewal process using Gnuplot. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] timev pointer on a RenewalData object. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Renewal::plot_write(const char *prefix , const char *title , + const RenewalData *timev) const + +{ + bool status; + int i , j , k , m , n; + int nb_file , nb_dist , nb_histo , nb_time , inf , sup; + double max , *scale; + const FrequencyDistribution **phisto; + const Distribution **pdist; + ostringstream *data_file_name; + + + // writing of the data files + + nb_file = 2; + if ((!timev) || (!(timev->length_bias))) { + nb_file++; + } + + nb_dist = PLOT_NEVENT_TIME; + nb_histo = 0; + if ((timev) && (timev->within) && (timev->backward) && (timev->forward)) { + if (timev->inter_event) { + nb_dist++; + nb_histo++; + } + if (timev->within->nb_element > 0) { + nb_dist++; + nb_histo++; + } + if (timev->length_bias) { + nb_dist += 2; + nb_histo++; + } + + nb_dist += 4; + nb_histo += 2; + } + + if ((!timev) || (!(timev->length_bias))) { + nb_dist += 3; + } + + nb_time = 0; + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + nb_time++; + } + } + + if (((time->variance == 0.) || (timev)) && (nb_time <= PLOT_NB_TIME)) { + nb_dist += nb_time; + if (timev) { + nb_histo += nb_time; + } + } + + if (time->variance > 0.) { + nb_dist += 1; + if (timev) { + nb_histo += 2; + } + + if (nb_time <= PLOT_NB_TIME) { + nb_file += nb_time; + } + } + + data_file_name = new ostringstream[nb_file]; + + data_file_name[0] << prefix << 0 << ".dat"; + + pdist = new const Distribution*[nb_dist]; + scale = new double[nb_dist]; + if (timev) { + phisto = new const FrequencyDistribution*[nb_histo]; + } + + nb_histo = 0; + nb_dist = 0; + + if ((timev) && (timev->within) && (timev->backward) && (timev->forward)) { + if (timev->inter_event) { + phisto[nb_histo++] = timev->inter_event; + pdist[nb_dist] = inter_event; + scale[nb_dist++] = timev->inter_event->nb_element; + } + + if (timev->within->nb_element > 0) { + phisto[nb_histo++] = timev->within; + pdist[nb_dist] = inter_event; + scale[nb_dist++] = timev->within->nb_element; + } + + if (timev->length_bias) { + phisto[nb_histo++] = timev->length_bias; + pdist[nb_dist] = length_bias; + scale[nb_dist++] = timev->length_bias->nb_element; + pdist[nb_dist] = inter_event; + scale[nb_dist++] = timev->length_bias->nb_element; + } + + phisto[nb_histo++] = timev->backward; + pdist[nb_dist] = backward; + scale[nb_dist++] = timev->backward->nb_element; + pdist[nb_dist] = inter_event; + scale[nb_dist++] = timev->backward->nb_element; + + phisto[nb_histo++] = timev->forward; + pdist[nb_dist] = forward; + scale[nb_dist++] = timev->forward->nb_element; + pdist[nb_dist] = inter_event; + scale[nb_dist++] = timev->forward->nb_element; + } + + if ((!timev) || (!(timev->length_bias))) { + pdist[nb_dist] = inter_event; + scale[nb_dist++] = 1.; + pdist[nb_dist] = length_bias; + scale[nb_dist++] = 1.; + pdist[nb_dist] = forward; + scale[nb_dist++] = 1.; + } + + inf = (timev ? timev->mixture->offset : mixture->offset); + if (inf < 1) { + inf = 1; + } + + sup = (timev ? timev->mixture->nb_value : mixture->nb_value); + if (sup - inf > PLOT_NEVENT_TIME) { + sup = inf + PLOT_NEVENT_TIME; + } + + for (i = inf;i < sup;i++) { + pdist[nb_dist] = nevent_time[i]; + scale[nb_dist++] = 1.; + } + + if ((time->variance > 0.) && (timev)) { + phisto[nb_histo++] = timev->htime; + } + + if (((time->variance == 0.) || (timev)) && (nb_time <= PLOT_NB_TIME)) { + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + pdist[nb_dist] = nb_event[i]; + + if (timev) { + phisto[nb_histo++] = timev->hnb_event[i]; + scale[nb_dist++] = timev->hnb_event[i]->nb_element; + } + else { + scale[nb_dist++] = 1.; + } + } + } + } + + if (time->variance > 0.) { + pdist[nb_dist] = mixture; + + if (timev) { + phisto[nb_histo++] = timev->mixture; + scale[nb_dist++] = timev->mixture->nb_element; + } + else { + scale[nb_dist++] = 1.; + } + } + + status = plot_print((data_file_name[0].str()).c_str() , nb_dist , pdist , + scale , NULL , nb_histo , phisto); + + if (status) { + i = 1; + if ((!timev) || (!(timev->length_bias))) { + data_file_name[i] << prefix << i << ".dat"; + backward->plot_print((data_file_name[i++].str()).c_str()); + } + + if ((time->variance > 0.) && (nb_time <= PLOT_NB_TIME)) { + for (j = time->offset;j < time->nb_value;j++) { + if (time->mass[j] > 0.) { + data_file_name[i] << prefix << i << ".dat"; + + pdist[0] = nb_event[j]; + + if (timev) { + scale[0] = time->mass[j] * timev->nb_element; + } + else { + scale[0] = time->mass[j]; + } + + plot_print((data_file_name[i++].str()).c_str() , 1 , pdist , + scale , NULL , 0 , NULL); + } + } + } + + data_file_name[nb_file - 1] << prefix << nb_file - 1 << ".dat"; + index_event->plot_print((data_file_name[nb_file - 1].str()).c_str() , index_event->length , + (((timev) && (timev->index_event)) ? timev->index_event : NULL)); + + // writing of the script files + + for (i = 0;i < 2;i++) { + j = 1; + k = nb_histo + 1; + + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + if ((timev) && (timev->within) && (timev->backward) && (timev->forward)) { + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if (timev->inter_event) { + out_file << "plot [0:" << inter_event->nb_value - 1 << "] [0:" + << (int)(MAX(timev->inter_event->max , inter_event->max * timev->inter_event->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(out_file); + out_file << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (timev->within->nb_element > 0) { + out_file << "plot [0:" << inter_event->nb_value - 1 << "] [0:" + << (int)(MAX(timev->within->max , inter_event->max * timev->within->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(out_file); + out_file << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (timev->length_bias) { + max = MAX(length_bias->max , inter_event->max); + + out_file << "plot [0:" << inter_event->nb_value - 1 << "] [0:" + << (int)(MAX(timev->length_bias->max , max * timev->length_bias->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(out_file); + out_file << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + max = MAX(backward->max , inter_event->max); + + out_file << "plot [0:" << inter_event->nb_value - 1 << "] [0:" + << (int)(MAX(timev->backward->max , max * timev->backward->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \""<< STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(out_file); + out_file << "\" with linespoints" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + max = MAX(forward->max , inter_event->max); + + out_file << "plot [0:" << inter_event->nb_value - 1 << "] [0:" + << (int)(MAX(timev->forward->max , max * timev->forward->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(out_file); + out_file << "\" with linespoints" << endl; + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (!(timev->length_bias)) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + } + + if ((!timev) || (!(timev->length_bias))) { + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + max = inter_event->max; + if (length_bias->max > max) { + max = length_bias->max; + } + if (backward->max > max) { + max = backward->max; + } + + out_file << "plot [0:" << inter_event->nb_value - 1 << "] [0:" + << MIN(max * YSCALE , 1.) << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << k++ << " title \"" + << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(out_file); + out_file << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ << " title \"" + << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION] + << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" title \"" + << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ << " title \"" + << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints" << endl; + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + if (inf < sup) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (nevent_time[sup - 1]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << nevent_time[sup - 1]->nb_value - 1 << "] [0:" + << MIN(nevent_time[inf]->max * YSCALE , 1.) << "] "; + for (m = inf;m < sup;m++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << SEQ_label[SEQL_TIME_UP] << " " << m << " " + << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints"; + if (m < sup - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (nevent_time[sup - 1]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + if ((time->variance > 0.) && (timev)) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (timev->htime->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(timev->htime->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << timev->htime->nb_value - 1 << "] [0:" + << (int)(timev->htime->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ << " title \"" + << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (timev->htime->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(timev->htime->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if (((time->variance == 0.) || (timev)) && (nb_time <= PLOT_NB_TIME)) { + for (m = time->offset;m < time->nb_value;m++) { + if (time->mass[m] > 0.) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (nb_event[m]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if (timev) { + out_file << "plot [0:" << nb_event[m]->nb_value - 1 << "] [0:" + << (int)(MAX(timev->hnb_event[m]->max , nb_event[m]->max * timev->hnb_event[m]->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[0].str()).c_str()) << "\" using " << j++ << " title \"" + << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << m << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses,\\" << endl; + } + + else { + out_file << "plot [0:" << nb_event[m]->nb_value - 1 << "] [0:" + << MIN(nb_event[m]->max * YSCALE , 1.) << "] "; + } + + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ << " title \"" + << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << m << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints" << endl; + + if (nb_event[m]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + } + } + + if (time->variance > 0.) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (mixture->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + if (timev) { + out_file << "plot [0:" << mixture->nb_value - 1 << "] [0:" + << (int)(MAX(timev->mixture->max , mixture->max * timev->mixture->nb_element) * YSCALE) + 1 + << "] \"" << label((data_file_name[0].str()).c_str()) << "\" using " << j++ << " title \"" + << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses,\\" << endl; + } + + else { + out_file << "plot [0:" << mixture->nb_value - 1 << "] [0:" + << MIN(mixture->max * YSCALE , 1.) << "] "; + } + + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " << k++ + << " title \"" << SEQ_label[SEQL_NB_EVENT_MIXTURE] << "\" with linespoints"; + + if (nb_time <= PLOT_NB_TIME) { + m = (((!timev) || (!(timev->length_bias))) ? 2 : 1); + for (n = time->offset;n < time->nb_value;n++) { + if (time->mass[n] > 0.) { + out_file << ",\\" << endl; + out_file << "\"" << label((data_file_name[m++].str()).c_str()) << "\" title \"" + << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << n << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION] << "\" with linespoints"; + } + } + } + out_file << endl; + + if (mixture->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (index_event->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [" << index_event->offset << ":" << index_event->length - 1 << "] [0:1] "; + if ((timev) && (timev->index_event)) { + out_file << "\"" << label((data_file_name[nb_file - 1].str()).c_str()) << "\" using " + << 3 << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << SEQ_label[SEQL_NO_EVENT_PROBABILITY] << " \" with linespoints,\\" << endl; + } + out_file << "\"" << label((data_file_name[nb_file - 1].str()).c_str()) << "\" using " + << 1 << " title \"" << SEQ_label[SEQL_THEORETICAL] << " " + << SEQ_label[SEQL_NO_EVENT_PROBABILITY] << " \" with linespoints,\\" << endl; + if ((timev) && (timev->index_event)) { + out_file << "\"" << label((data_file_name[nb_file - 1].str()).c_str()) << "\" using " + << 4 << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << SEQ_label[SEQL_EVENT_PROBABILITY] << " \" with linespoints,\\" << endl; + } + out_file << "\"" << label((data_file_name[nb_file - 1].str()).c_str()) << "\" using " + << 2 << " title \"" << SEQ_label[SEQL_THEORETICAL] << " " + << SEQ_label[SEQL_EVENT_PROBABILITY] << " \" with linespoints" << endl; + + if (index_event->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + delete [] pdist; + delete [] scale; + if (timev) { + delete [] phisto; + } + + delete [] data_file_name; + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a Renewal object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Renewal::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status = plot_write(prefix , title , renewal_data); + + error.init(); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a renewal process. + * + * \param[in] timev pointer on a RenewalData object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* Renewal::get_plotable(const RenewalData *timev) const + +{ + int i , j , k; + int nb_plot_set , nb_time , inf , sup , scale; + double max; + ostringstream legend , title; + MultiPlotSet *plot_set; + + + nb_plot_set = 1; + if ((timev) && (timev->within) && (timev->backward) && (timev->forward)) { + nb_plot_set += 2; + if (timev->inter_event) { + nb_plot_set++; + } + if (timev->within->nb_element > 0) { + nb_plot_set++; + } + if (timev->length_bias) { + nb_plot_set++; + } + } + + if ((!timev) || (!(timev->length_bias))) { + nb_plot_set++; + } + + inf = (timev ? timev->mixture->offset : mixture->offset); + if (inf < 1) { + inf = 1; + } + + sup = (timev ? timev->mixture->nb_value : mixture->nb_value); + if (sup - inf > PLOT_NEVENT_TIME) { + sup = inf + PLOT_NEVENT_TIME; + } + + if (inf < sup) { + nb_plot_set++; + } + + if ((time->variance > 0.) && (timev)) { + nb_plot_set++; + } + + nb_time = 0; + for (i = time->offset;i < time->nb_value;i++) { + if (time->mass[i] > 0.) { + nb_time++; + } + } + + if (nb_time <= PLOT_NB_TIME) { + if (!timev) { + nb_plot_set++; + } + else { + nb_plot_set += 2 * nb_time; + } + } + + if (time->variance > 0.) { + nb_plot_set++; + } + + plot_set = new MultiPlotSet(nb_plot_set); + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + i = 0; + if ((timev) && (timev->within) && (timev->backward) && (timev->forward)) { + if (timev->inter_event) { + + // fit of the inter-event distribution + + plot[i].xrange = Range(0 , inter_event->nb_value - 1); + plot[i].yrange = Range(0 , ceil(MAX(timev->inter_event->max , + inter_event->max * timev->inter_event->nb_element) * YSCALE)); + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + timev->inter_event->plotable_frequency_write(plot[i][0]); + + legend.str(""); + legend << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(legend); + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + inter_event->plotable_mass_write(plot[i][1] , timev->inter_event->nb_element); + i++; + } + + if (timev->within->nb_element > 0) { + + // frequency distribution of time intervals between events within the observation period + // fitted by the inter-event distribution + + plot[i].xrange = Range(0 , inter_event->nb_value - 1); + plot[i].yrange = Range(0 , ceil(MAX(timev->within->max , + inter_event->max * timev->within->nb_element) * YSCALE)); + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(2); + + legend.str(""); + legend << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + timev->within->plotable_frequency_write(plot[i][0]); + + legend.str(""); + legend << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(legend); + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + inter_event->plotable_mass_write(plot[i][1] , timev->within->nb_element); + i++; + } + + if (timev->length_bias) { + + // fit of the length-biased distribution + + plot[i].xrange = Range(0 , inter_event->nb_value - 1); + + max = MAX(length_bias->max , inter_event->max); + plot[i].yrange = Range(0 , ceil(MAX(timev->length_bias->max , + max * timev->length_bias->nb_element) * YSCALE)); + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(3); + + legend.str(""); + legend << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + timev->length_bias->plotable_frequency_write(plot[i][0]); + + legend.str(""); + legend << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + length_bias->plotable_mass_write(plot[i][1] , timev->length_bias->nb_element); + + legend.str(""); + legend << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(legend); + plot[i][2].legend = legend.str(); + + plot[i][2].style = "linespoints"; + + inter_event->plotable_mass_write(plot[i][2] , timev->length_bias->nb_element); + i++; + } + + // fit of the backward recurrence time distribution + + plot[i].xrange = Range(0 , inter_event->nb_value - 1); + + max = MAX(backward->max , inter_event->max); + plot[i].yrange = Range(0 , ceil(MAX(timev->backward->max , + max * timev->backward->nb_element) * YSCALE)); + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(3); + + legend.str(""); + legend << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + timev->backward->plotable_frequency_write(plot[i][0]); + + legend.str(""); + legend << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + backward->plotable_mass_write(plot[i][1] , timev->backward->nb_element); + + legend.str(""); + legend << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(legend); + plot[i][2].legend = legend.str(); + + plot[i][2].style = "linespoints"; + + inter_event->plotable_mass_write(plot[i][2] , timev->backward->nb_element); + i++; + + // fit of the forward recurrence time distribution + + plot[i].xrange = Range(0 , inter_event->nb_value - 1); + + max = MAX(forward->max , inter_event->max); + plot[i].yrange = Range(0 , ceil(MAX(timev->forward->max , + max * timev->forward->nb_element) * YSCALE)); + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(3); + + legend.str(""); + legend << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + timev->forward->plotable_frequency_write(plot[i][0]); + + legend.str(""); + legend << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + forward->plotable_mass_write(plot[i][1] , timev->forward->nb_element); + + legend.str(""); + legend << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(legend); + plot[i][2].legend = legend.str(); + + plot[i][2].style = "linespoints"; + + inter_event->plotable_mass_write(plot[i][2] , timev->forward->nb_element); + i++; + } + + if ((!timev) || (!(timev->length_bias))) { + + // inter-event distribution, length-biased distribution, + // backward and forward recurrence time distributions + + plot[i].xrange = Range(0 , inter_event->nb_value - 1); + + max = inter_event->max; + if (length_bias->max > max) { + max = length_bias->max; + } + if (backward->max > max) { + max = backward->max; + } + plot[i].yrange = Range(0. , MIN(max * YSCALE , 1.)); + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(4); + + legend.str(""); + legend << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_DISTRIBUTION]; + inter_event->plot_title_print(legend); + plot[i][0].legend = legend.str(); + + plot[i][0].style = "linespoints"; + + inter_event->plotable_mass_write(plot[i][0]); + + legend.str(""); + legend << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + length_bias->plotable_mass_write(plot[i][1]); + + legend.str(""); + legend << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION]; + plot[i][2].legend = legend.str(); + + plot[i][2].style = "linespoints"; + + backward->plotable_mass_write(plot[i][2]); + + legend.str(""); + legend << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_DISTRIBUTION]; + plot[i][3].legend = legend.str(); + + plot[i][3].style = "linespoints"; + + forward->plotable_mass_write(plot[i][3]); + i++; + } + + if (inf < sup) { + + // distributions of the time to the nth event + + plot[i].xrange = Range(0 , nevent_time[sup - 1]->nb_value - 1); + plot[i].yrange = Range(0. , MIN(nevent_time[inf]->max * YSCALE , 1.)); + + if (nevent_time[sup - 1]->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(sup - inf); + + for (j = inf;j < sup;j++) { + legend.str(""); + legend << SEQ_label[SEQL_TIME_UP] << " " << j << " " << STAT_label[STATL_DISTRIBUTION]; + plot[i][j - inf].legend = legend.str(); + + plot[i][j - inf].style = "linespoints"; + + nevent_time[j]->plotable_mass_write(plot[i][j - inf]); + } + + i++; + } + + if ((time->variance > 0.) && (timev)) { + + // observation period frequency distribution + + plot[i].xrange = Range(0 , timev->htime->nb_value - 1); + plot[i].yrange = Range(0 , ceil(timev->htime->max * YSCALE)); + + if (timev->htime->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(timev->htime->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + timev->htime->plotable_frequency_write(plot[i][0]); + i++; + } + + if (nb_time <= PLOT_NB_TIME) { + if (!timev) { + + // number of events distributions + + plot[i].xrange = Range(0 , nb_event[time->nb_value - 1]->nb_value - 1); + plot[i].yrange = Range(0. , MIN(nb_event[time->offset]->max * YSCALE , 1.)); + + if (nb_event[time->nb_value - 1]->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(nb_time); + + j = 0; + for (k = time->offset;k < time->nb_value;k++) { + if (time->mass[k] > 0.) { + legend.str(""); + legend << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << k << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linepoints"; + + nb_event[k]->plotable_mass_write(plot[i][j]); + + j++; + } + } + + i++; + } + + else { + for (j = time->offset;j < time->nb_value;j++) { + if (time->mass[j] > 0.) { + + // fit of the number of events distribution + + plot[i].xrange = Range(0 , nb_event[j]->nb_value - 1); + plot[i].yrange = Range(0 , ceil(MAX(timev->hnb_event[j]->max , + nb_event[j]->max * timev->hnb_event[j]->nb_element) * YSCALE)); + + if (nb_event[j]->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << j << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + timev->hnb_event[j]->plotable_frequency_write(plot[i][0]); + + legend.str(""); + legend << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << j << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_DISTRIBUTION]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + nb_event[j]->plotable_mass_write(plot[i][1] , timev->hnb_event[j]->nb_element); + i++; + + // number of events cumulative distribution function + + title.str(""); + title << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " + << j << " " << SEQ_label[SEQL_TIME_UNIT]; + plot[i].title = title.str(); + + plot[i].xrange = Range(0 , nb_event[j]->nb_value - 1); + plot[i].yrange = Range(0 , 1.); + + if (nb_event[j]->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(2); + + legend.str(""); + legend << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "linespoints"; + + timev->hnb_event[j]->plotable_cumul_write(plot[i][0]); + + legend.str(""); + legend << STAT_label[STATL_CUMULATIVE] << " " << STAT_label[STATL_DISTRIBUTION] << " " + << STAT_label[STATL_FUNCTION]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + nb_event[j]->plotable_cumul_write(plot[i][1]); + i++; + } + } + } + } + + if (time->variance > 0.) { + if (timev) { + plot[i].yrange = Range(0 , ceil(MAX(timev->mixture->max , + mixture->max * timev->mixture->nb_element) * YSCALE)); + plot[i].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + timev->mixture->plotable_frequency_write(plot[i][0]); + + scale = timev->mixture->nb_element; + j = 1; + } + + else { + plot[i].yrange = Range(0. , MIN(mixture->max * YSCALE , 1.)); + plot[i].resize(1); + + scale = 1.; + j = 0; + } + + plot[i].xrange = Range(0 , mixture->nb_value - 1); + + if (mixture->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i][j].legend = SEQ_label[SEQL_NB_EVENT_MIXTURE]; + + plot[i][j].style = "linespoints"; + + mixture->plotable_mass_write(plot[i][j] , scale); + i++; + } + + // fit of no-event/event probabilities as a function of time + + plot[i].xrange = Range(index_event->offset , index_event->length - 1); + plot[i].yrange = Range(0. , 1.); + + if (index_event->length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + if ((timev) && (timev->index_event)) { + plot[i].resize(4); + } + else { + plot[i].resize(2); + } + + j = 0; + if ((timev) && (timev->index_event)) { + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + + timev->index_event->plotable_write(0 , plot[i][j]); + j++; + } + + legend.str(""); + legend << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + + index_event->plotable_write(0 , plot[i][j]); + j++; + + if ((timev) && (timev->index_event)) { + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_EVENT_PROBABILITY]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + + timev->index_event->plotable_write(1 , plot[i][j]); + j++; + } + + legend.str(""); + legend << SEQ_label[SEQL_THEORETICAL] << " " << SEQ_label[SEQL_EVENT_PROBABILITY]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "linespoints"; + + index_event->plotable_write(1 , plot[i][j]); + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a renewal process. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* Renewal::get_plotable() const + +{ + return get_plotable(renewal_data); +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/renewal_algorithms.cpp b/src/cpp/sequence_analysis/renewal_algorithms.cpp new file mode 100644 index 0000000..3ea6ede --- /dev/null +++ b/src/cpp/sequence_analysis/renewal_algorithms.cpp @@ -0,0 +1,1513 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include "stat_tool/stat_label.h" + +#include "renewal.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the information quantity of a TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +double TimeEvents::information_computation() const + +{ + int i; + double information , buff; + + + information = htime->information_computation(); + + if (information != D_INF) { + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + buff = hnb_event[i]->information_computation(); + + if (buff != D_INF) { + information += buff; + } + else { + information = D_INF; + break; + } + } + } + } + + return information; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of a mixture of number of events distributions + * for pairs {observation period, number of events}. + * + * \param[in] timev reference on a TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +double Renewal::likelihood_computation(const TimeEvents &timev) const + +{ + int i; + double likelihood , buff; + + + likelihood = time->likelihood_computation(*(timev.htime)); + + if (likelihood != D_INF) { + for (i = timev.htime->offset;i < timev.htime->nb_value;i++) { + if (timev.htime->frequency[i] > 0) { + buff = nb_event[i]->likelihood_computation(*(timev.hnb_event[i])); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + break; + } + } + } + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the inter-event distribution reestimation quantities + * (EM estimator of an ordinary renewal process on the basis of count data). + * + * \param[in] timev reference on the pairs {observation period, number of events}, + * \param[in] inter_event_reestim pointer on the reestimation quantities. + */ +/*--------------------------------------------------------------*/ + +void Renewal::expectation_step(const TimeEvents &timev , + Reestimation *inter_event_reestim) const + +{ + int i , j; + int min_time , max_time , *ptime , *pnb_event , *pfrequency; + double num , denom , *ifrequency , *pmass; + + + // initialization + + ifrequency = inter_event_reestim->frequency; + for (i = 0;i < inter_event_reestim->alloc_nb_value;i++) { + *ifrequency++ = 0.; + } + + ptime = timev.time; + pnb_event = timev.nb_event; + pfrequency = timev.frequency; + + for (i = 0;i < timev.nb_class;i++) { + + // case no event + + if (*pnb_event == 0) { + min_time = MAX(inter_event->offset , *ptime + 1); + + if (min_time < inter_event->nb_value) { + denom = nb_event[*ptime]->mass[*pnb_event]; + + if (denom > 0.) { + ifrequency = inter_event_reestim->frequency + min_time; + pmass = inter_event->mass + min_time; + for (j = min_time;j < inter_event->nb_value;j++) { + *ifrequency++ += *pfrequency * *pmass++ / denom; + } + } + } + } + + // case number of events > 0 + + else { + denom = 0.; + if (*pnb_event < nb_event[*ptime]->nb_value) { + denom = nb_event[*ptime]->mass[*pnb_event]; + } + + if (denom > 0.) { + max_time = MIN(nevent_time[*pnb_event]->nb_value - 1 , *ptime); + + ifrequency = inter_event_reestim->frequency + inter_event->offset; + pmass = inter_event->mass + inter_event->offset; + + for (j = inter_event->offset;j < inter_event->nb_value;j++) { + num = 0.; + + // case number of events = 1: complet time interval + + if (*pnb_event == 1) { + if ((*ptime - j >= 0) && (*ptime - j < nevent_time[*pnb_event]->nb_value)) { + num = 1. - nevent_time[*pnb_event]->cumul[*ptime - j]; + } + } + + // case number of events > 1: complete time intervals + + else { + if (*ptime - j >= nevent_time[*pnb_event - 1]->offset) { + if (*ptime - j < nevent_time[*pnb_event - 1]->nb_value) { + num = *pnb_event * (nevent_time[*pnb_event - 1]->cumul[*ptime - j] - + nevent_time[*pnb_event]->cumul[*ptime - j]); + } + else if (*ptime - j < nevent_time[*pnb_event]->nb_value) { + num = *pnb_event * (1. - nevent_time[*pnb_event]->cumul[*ptime - j]); + } + } + } + + // right-censored time interval + + if ((max_time > *ptime - j) && (max_time >= nevent_time[*pnb_event]->offset)) { + num += nevent_time[*pnb_event]->cumul[max_time]; + if (*ptime - j >= nevent_time[*pnb_event]->offset) { + num -= nevent_time[*pnb_event]->cumul[*ptime - j]; + } + } + + *ifrequency++ += *pfrequency * *pmass++ * num / denom; + } + } + } + + ptime++; + pnb_event++; + pfrequency++; + } + + inter_event_reestim->nb_value_computation(); + inter_event_reestim->offset_computation(); + inter_event_reestim->nb_element_computation(); + inter_event_reestim->max_computation(); + inter_event_reestim->mean_computation(); + inter_event_reestim->variance_computation(); + +# ifdef DEBUG + cout << "\n" << (timev.mixture->mean + 1) * timev.nb_element << " | " + << " " << inter_event_reestim->nb_element << endl; + + cout << "\nthe reestimation quantities inter-event distribution:" << *inter_event_reestim << endl; +# endif + +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the inter-event distribution reestimation quantities + * (EM estimator of an equilibrium renewal process on the basis of count data). + * + * \param[in] timev reference on the pairs {observation period, number of events}, + * \param[in] inter_event_reestim pointer on the inter-event distribution reestimation quantities, + * \param[in] length_bias_reestim pointer on the length-biased distribution reestimation quantities, + * \param[in] estimator estimator type (complete or partial likelihood), + * \param[in] combination combination or not of the reestimation quantities, + * \param[in] mean_estimator method for the computation of the inter-event distribution mean (equilibrium renewel process). + */ +/*--------------------------------------------------------------*/ + +void Renewal::expectation_step(const TimeEvents &timev , + Reestimation *inter_event_reestim , + Reestimation *length_bias_reestim , + censoring_estimator estimator , bool combination , + duration_distribution_mean_estimator mean_estimator) const + +{ + int i , j; + int max_time , offset , nb_value , *ptime , *pnb_event , *pfrequency; + double complete_num , censored_num , denom , inter_event_mean , *ifrequency , + *lfrequency , *pmass; + + + // initializations + + ifrequency = inter_event_reestim->frequency; + for (i = 0;i < inter_event_reestim->alloc_nb_value;i++) { + *ifrequency++ = 0.; + } + + if (estimator == COMPLETE_LIKELIHOOD) { + lfrequency = length_bias_reestim->frequency; + for (i = 0;i < length_bias_reestim->alloc_nb_value;i++) { + *lfrequency++ = 0.; + } + } + + ptime = timev.time; + pnb_event = timev.nb_event; + pfrequency = timev.frequency; + + for (i = 0;i < timev.nb_class;i++) { + + // case no event + + if (*pnb_event == 0) { + if ((estimator == COMPLETE_LIKELIHOOD) && (*ptime + 1 < inter_event->nb_value)) { + denom = nb_event[*ptime]->mass[*pnb_event] * inter_event->mean; + + if (denom > 0.) { + lfrequency = length_bias_reestim->frequency + *ptime + 1; + pmass = inter_event->mass + *ptime + 1; + for (j = *ptime + 1;j < inter_event->nb_value;j++) { + *lfrequency++ += *pfrequency * (j - *ptime) * *pmass++ / denom; + } + } + } + } + + // case number of events > 0 + + else { + denom = 0.; + if (*pnb_event < nb_event[*ptime]->nb_value) { + denom = nb_event[*ptime]->mass[*pnb_event]; + } + + if (denom > 0.) { + + // left-censored time interval + +/* if (estimator == COMPLETE_LIKELIHOOD) { + lfrequency = length_bias_reestim->frequency + inter_event->offset; + pmass = inter_event->mass + inter_event->offset; + num = 0.; + + for (j = 1;j < inter_event->nb_value;j++) { + if (j <= *ptime) { + + // case 1 event + + if (*pnb_event == 1) { + if (*ptime - j < aux_nevent_time[*pnb_event]->nb_value) { + num += 1. - aux_nevent_time[*pnb_event]->cumul[*ptime - j]; + } + } + + // case number of events > 1 + + else { + if (*ptime - j >= aux_nevent_time[*pnb_event - 1]->offset) { + if (*ptime - j < aux_nevent_time[*pnb_event - 1]->nb_value) { + num += aux_nevent_time[*pnb_event - 1]->cumul[*ptime - j] - + aux_nevent_time[*pnb_event]->cumul[*ptime - j]; + } + else if (*ptime - j < aux_nevent_time[*pnb_event]->nb_value) { + num += 1. - aux_nevent_time[*pnb_event]->cumul[*ptime - j]; + } + } + } + } + + if (j >= inter_event->offset) { + *lfrequency++ += *pfrequency * *pmass++ * num / (denom * inter_event->mean); + } + } + } */ + + max_time = MIN(nevent_time[*pnb_event]->nb_value - 1 , *ptime); + + ifrequency = inter_event_reestim->frequency + inter_event->offset; + if (estimator == COMPLETE_LIKELIHOOD) { + lfrequency = length_bias_reestim->frequency + inter_event->offset; + } + pmass = inter_event->mass + inter_event->offset; + + for (j = inter_event->offset;j < inter_event->nb_value;j++) { + complete_num = 0.; + + // case number of events > 1: complete time intervals + + if (*pnb_event > 1) { + if (*ptime - j >= nevent_time[*pnb_event - 1]->offset) { + if (*ptime - j < nevent_time[*pnb_event - 1]->nb_value) { + complete_num = (*pnb_event - 1) * (nevent_time[*pnb_event - 1]->cumul[*ptime - j] - + nevent_time[*pnb_event]->cumul[*ptime - j]); + } + else if (*ptime - j < nevent_time[*pnb_event]->nb_value) { + complete_num = (*pnb_event - 1) * (1. - nevent_time[*pnb_event]->cumul[*ptime - j]); + } + } + } + + // left- and right-censored time intervals + + censored_num = 0.; + if ((max_time > *ptime - j) && (max_time >= nevent_time[*pnb_event]->offset)) { + censored_num += nevent_time[*pnb_event]->cumul[max_time]; + if (*ptime - j >= nevent_time[*pnb_event]->offset) { + censored_num -= nevent_time[*pnb_event]->cumul[*ptime - j]; + } + } + + *ifrequency++ += *pfrequency * *pmass * (complete_num + censored_num) / denom; + if (estimator == COMPLETE_LIKELIHOOD) { + *lfrequency++ += *pfrequency * *pmass * censored_num / denom; + } + pmass++; + } + } + } + + ptime++; + pnb_event++; + pfrequency++; + } + + switch (estimator) { + + case PARTIAL_LIKELIHOOD : { + inter_event_reestim->nb_value_computation(); + inter_event_reestim->offset_computation(); + inter_event_reestim->nb_element_computation(); + break; + } + + case COMPLETE_LIKELIHOOD : { + offset = 1; + nb_value = inter_event_reestim->alloc_nb_value; + + ifrequency = inter_event_reestim->frequency + inter_event_reestim->alloc_nb_value; + lfrequency = length_bias_reestim->frequency + inter_event_reestim->alloc_nb_value; + while ((*--ifrequency == 0) && (*--lfrequency == 0) && (nb_value > 2)) { + nb_value--; + } + inter_event_reestim->nb_value = nb_value; + length_bias_reestim->nb_value = nb_value; + + ifrequency = inter_event_reestim->frequency + offset; + lfrequency = length_bias_reestim->frequency + offset; + while ((*ifrequency++ == 0) && (*lfrequency++ == 0) && (offset < nb_value - 1)) { + offset++; + } + inter_event_reestim->offset = offset; + length_bias_reestim->offset = offset; + + inter_event_reestim->nb_element_computation(); + length_bias_reestim->nb_element_computation(); + break; + +# ifdef DEBUG + inter_event_reestim->max_computation(); + inter_event_reestim->mean_computation(); + inter_event_reestim->variance_computation(); + + length_bias_reestim->max_computation(); + length_bias_reestim->mean_computation(); + length_bias_reestim->variance_computation(); + + cout << "\n" << timev.nb_element << " | " << length_bias_reestim->nb_element << " || " + << timev.mixture->mean * timev.nb_element << " | " << " " << inter_event_reestim->nb_element << endl; + + cout << "\nthe reestimation quantities inter-event distribution:" << *inter_event_reestim << endl; + cout << "\nthe reestimation quantities length-biased distribution:" << *length_bias_reestim << endl; +# endif + + } + } + + if ((estimator == COMPLETE_LIKELIHOOD) && (combination)) { + switch (mean_estimator) { + case ESTIMATED : + inter_event_mean = timev.htime->mean / timev.mixture->mean; + break; + case COMPUTED : + inter_event_mean = interval_bisection(inter_event_reestim , length_bias_reestim); + break; + case ONE_STEP_LATE : + inter_event_mean = inter_event->mean; + break; + } + +# ifdef DEBUG + if (mean_estimator != ESTIMATED) { + cout << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_MEAN] << ": " + << inter_event_mean << " (" << timev.htime->mean / timev.mixture->mean << ") | "; + } +# endif + + inter_event_reestim->equilibrium_process_combination(length_bias_reestim , inter_event_mean); + } + + else { + inter_event_reestim->max_computation(); + inter_event_reestim->mean_computation(); + inter_event_reestim->variance_computation(); + } + +# ifdef DEBUG + cout << "\nthe reestimation quantities inter-event distribution:" << *inter_event_reestim << endl; +# endif + +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a renewal process on the basis of pairs + * {observation period, number of events} using the EM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] type renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] iinter_event reference on the initial inter-event distribution, + * \param[in] estimator estimator type (maximum likelihood or penalized likelihood or + * estimation of a parametric distribution), + * \param[in] nb_iter number of iterations, + * \param[in] equilibrium_estimator estimator type in the case of an equilibrium renewal process, + * \param[in] mean_estimator method of computation of the inter-event distribution mean, + * \param[in] weight penalty weight, + * \param[in] pen_type penalty type, + * \param[in] outside management of side effects (zero outside the support or + * continuation of the distribution). + * + * \return Renewal object. + */ +/*--------------------------------------------------------------*/ + +Renewal* TimeEvents::estimation(StatError &error , ostream *os , process_type type , + const DiscreteParametric &iinter_event , estimation_criterion estimator , + int nb_iter , censoring_estimator equilibrium_estimator , + duration_distribution_mean_estimator mean_estimator , + double weight , penalty_type pen_type , side_effect outside) const + +{ + bool status = true; + int i; + int nb_likelihood_decrease; + double likelihood , previous_likelihood , information , hlikelihood , inter_event_mean , *penalty; + DiscreteParametric *pinter_ev; + Reestimation *inter_event_reestim , *length_bias_reestim; + FrequencyDistribution *hreestim; + Renewal *renew; + + + renew = NULL; + error.init(); + + if (mixture->nb_value <= 2) { + status = false; + error.update(SEQ_error[SEQR_MAX_NB_EVENT_TOO_SMALL]); + } + if (mixture->mean < MIN_NB_EVENT) { + status = false; + error.update(SEQ_error[SEQR_NB_EVENT_TOO_SMALL]); + } + + if (min_inter_event_computation() < MIN_INTER_EVENT) { + status = false; + error.update(SEQ_error[SEQR_TIME_UNIT]); + } + + if ((nb_iter != I_DEFAULT) && (nb_iter < 1)) { + status = false; + error.update(STAT_error[STATR_NB_ITERATION]); + } + + if ((weight != D_DEFAULT) && (weight <= 0.)) { + status = false; + error.update(STAT_error[STATR_PENALTY_WEIGHT]); + } + + if (status) { + information = information_computation(); + + // construction of a Renewal object + + renew = new Renewal(type , *htime , iinter_event); + renew->renewal_data = new RenewalData(*this , type); + + pinter_ev = renew->inter_event; + + if (estimator == PENALIZED_LIKELIHOOD) { + penalty = new double[pinter_ev->alloc_nb_value]; + + if (weight == D_DEFAULT) { + if (pen_type != ENTROPY) { + weight = RENEWAL_DIFFERENCE_WEIGHT; + } + else { + weight = RENEWAL_ENTROPY_WEIGHT; + } + } + + if (equilibrium_estimator == PARTIAL_LIKELIHOOD) { + weight *= mixture->mean * nb_element; + } + else { + weight *= (mixture->mean + 1) * nb_element; + } + } + + inter_event_reestim = new Reestimation(pinter_ev->alloc_nb_value); + + if (type == EQUILIBRIUM) { + if (equilibrium_estimator == COMPLETE_LIKELIHOOD) { + length_bias_reestim = new Reestimation(pinter_ev->alloc_nb_value); + } + } + + renew->computation(); + + renew->init(CATEGORICAL , I_DEFAULT , I_DEFAULT , D_DEFAULT , D_DEFAULT); + likelihood = D_INF; + i = 0; + + do { + i++; + + // computation of the reestimation quantities + + switch (type) { + case ORDINARY : + renew->expectation_step(*this , inter_event_reestim); + break; + case EQUILIBRIUM : + renew->expectation_step(*this , inter_event_reestim , + length_bias_reestim , equilibrium_estimator); + break; + } + + if (estimator != PENALIZED_LIKELIHOOD) { + if ((type == ORDINARY) || (equilibrium_estimator == PARTIAL_LIKELIHOOD)) { + inter_event_reestim->distribution_estimation(pinter_ev); + } + + else { + switch (mean_estimator) { + case ESTIMATED : + inter_event_mean = htime->mean / mixture->mean; + break; + case COMPUTED : + inter_event_mean = interval_bisection(inter_event_reestim , length_bias_reestim); + break; + case ONE_STEP_LATE : + inter_event_mean = pinter_ev->mean; + break; + } + + inter_event_reestim->equilibrium_process_estimation(length_bias_reestim , pinter_ev , + inter_event_mean); + } + } + + else { + if ((type == ORDINARY) || (equilibrium_estimator == PARTIAL_LIKELIHOOD)) { + inter_event_reestim->penalized_likelihood_estimation(pinter_ev , weight , + pen_type , penalty , + outside); + } + + else { + switch (mean_estimator) { + case ESTIMATED : + inter_event_mean = htime->mean / mixture->mean; + break; + case ONE_STEP_LATE : + inter_event_mean = pinter_ev->mean; + break; + } + + inter_event_reestim->penalized_likelihood_equilibrium_process_estimation(length_bias_reestim , + pinter_ev , inter_event_mean , + weight , pen_type , + penalty , outside); + } + } + + // computation of the mixture of number of events distributions and the associated log-likelihood + + renew->computation(); + + previous_likelihood = likelihood; + likelihood = renew->likelihood_computation(*this); + + if ((os) && ((i < 10) || ((i < 100) && (i % 10 == 0)) || ((i < 1000) && (i % 100 == 0)) || (i % 1000 == 0))) { + *os << STAT_label[STATL_ITERATION] << " " << i << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_DEVIANCE] << ": " << 2 * (information - likelihood) << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << pinter_ev->second_difference_norm_computation(); + if (estimator == PENALIZED_LIKELIHOOD) { + *os << " cumul: " << pinter_ev->cumul[pinter_ev->nb_value - 1]; + } + *os << endl; + } + } + while ((likelihood != D_INF) && (((nb_iter == I_DEFAULT) && (i < RENEWAL_NB_ITER) && + ((likelihood - previous_likelihood) / -likelihood > RENEWAL_LIKELIHOOD_DIFF)) || + ((nb_iter != I_DEFAULT) && (i < nb_iter)))); + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << i << " " << STAT_label[STATL_ITERATIONS] << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_DEVIANCE] << ": " << 2 * (information - likelihood) << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << pinter_ev->second_difference_norm_computation(); + if (estimator == PENALIZED_LIKELIHOOD) { + *os << " cumul: " << pinter_ev->cumul[pinter_ev->nb_value - 1]; + } + *os << endl; + } + + if (estimator == PARAMETRIC_REGULARIZATION) { + hreestim = new FrequencyDistribution(pinter_ev->alloc_nb_value); + + likelihood = D_INF; + nb_likelihood_decrease = 0; + + i = 0; + do { + i++; + + // computation of the reestimation quantities + + switch (type) { + case ORDINARY : + renew->expectation_step(*this , inter_event_reestim); + break; + case EQUILIBRIUM : + renew->expectation_step(*this , inter_event_reestim , + length_bias_reestim , equilibrium_estimator , + true , mean_estimator); + break; + } + + hreestim->update(inter_event_reestim , (int)(inter_event_reestim->nb_element * + MAX(sqrt(inter_event_reestim->variance) , 1.) * RENEWAL_COEFF)); + hlikelihood = hreestim->Reestimation::type_parametric_estimation(pinter_ev , 1 , true , + RENEWAL_THRESHOLD); + + if (hlikelihood == D_INF) { + likelihood = D_INF; + } + + // computation of the mixture of number of events distributions and the associated log-likelihood + + else { + renew->init(pinter_ev->ident , pinter_ev->inf_bound , pinter_ev->sup_bound , + pinter_ev->parameter , pinter_ev->probability); + renew->computation(); + + previous_likelihood = likelihood; + likelihood = renew->likelihood_computation(*this); + + if (likelihood < previous_likelihood) { + nb_likelihood_decrease++; + } + else { + nb_likelihood_decrease = 0; + } + +# ifdef DEBUG + if ((i < 10) || (i % 10 == 0)) { + cout << STAT_label[STATL_ITERATION] << " " << i << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << pinter_ev->second_difference_norm_computation() << endl; + } +# endif + + } + } + while ((likelihood != D_INF) && (i < RENEWAL_NB_ITER) && + (((likelihood - previous_likelihood) / -likelihood > RENEWAL_LIKELIHOOD_DIFF) || + (hlikelihood == D_INF) || (nb_likelihood_decrease == 1))); + + delete hreestim; + + if ((os) && (likelihood != D_INF)) { + *os << "\n" << i << " " << STAT_label[STATL_ITERATIONS] << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << pinter_ev->second_difference_norm_computation() << endl; + } + } + } + + if (estimator == PENALIZED_LIKELIHOOD) { + delete [] penalty; + } + + delete inter_event_reestim; + + if (type == EQUILIBRIUM) { + if (equilibrium_estimator == COMPLETE_LIKELIHOOD) { + delete length_bias_reestim; + } + } + + if (likelihood != D_INF) { + + // update of the number of free parameters + + pinter_ev->nb_parameter_update(); + for (i = renew->time->offset;i < renew->time->nb_value;i++) { + if (renew->time->mass[i] > 0.) { + renew->nb_event[i]->nb_parameter = pinter_ev->nb_parameter; + } + } + renew->mixture->nb_parameter = pinter_ev->nb_parameter; + } + + else { + delete renew; + renew = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + } + + return renew; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation a renewal process on the basis of pairs + * {observation period, number of events} using the EM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] type renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] estimator type (maximum likelihood or penalized likelihood or + * estimation of a parametric distribution), + * \param[in] nb_iter number of iterations, + * \param[in] equilibrium_estimator estimator type in the case of an equilibrium renewal process, + * \param[in] mean_estimator method of computation of the inter-event distribution mean, + * \param[in] weight penalty weight, + * \param[in] pen_type penalty type, + * \param[in] outside management of side effects (zero outside the support or + * continuation of the distribution). + * + * \return Renewal object. + */ +/*--------------------------------------------------------------*/ + +Renewal* TimeEvents::estimation(StatError &error , ostream *os , process_type type , + estimation_criterion estimator , int nb_iter , + censoring_estimator equilibrium_estimator , + duration_distribution_mean_estimator mean_estimator , double weight , + penalty_type pen_type , side_effect outside) const + +{ + double proba; + DiscreteParametric *iinter_event; + Renewal *renew; + + + proba = mixture->mean / htime->mean; + if (proba > 1. - RENEWAL_INIT_PROBABILITY) { + proba = 1. - RENEWAL_INIT_PROBABILITY; + } + else if (proba < RENEWAL_INIT_PROBABILITY) { + proba = RENEWAL_INIT_PROBABILITY; + } + + iinter_event = new DiscreteParametric(NEGATIVE_BINOMIAL , 1 , I_DEFAULT , 1. , + proba , RENEWAL_THRESHOLD); + +# ifdef DEBUG + iinter_event->ascii_print(cout); +# endif + + renew = estimation(error , os , type , *iinter_event , estimator , nb_iter , + equilibrium_estimator , mean_estimator , weight , + pen_type , outside); + delete iinter_event; + + return renew; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of an equilibrium renewal process on the basis of + * time interval data using the EM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] iinter_event reference on the initial inter-event distribution, + * \param[in] estimator estimator type (maximum likelihood or penalized likelihood), + * \param[in] nb_iter number of iterations, + * \param[in] mean_estimator method of computation of the inter-event distribution mean, + * \param[in] weight penalty weight, + * \param[in] pen_type penalty type, + * \param[in] outside management of side effects (zero outside the support or + * continuation of the distribution). + * + * \return Renewal object. + */ +/*--------------------------------------------------------------*/ + +Renewal* RenewalData::estimation(StatError &error , ostream *os , + const DiscreteParametric &iinter_event , + estimation_criterion estimator , int nb_iter , + duration_distribution_mean_estimator mean_estimator , double weight , + penalty_type pen_type , side_effect outside) const + +{ + int i , j; + int *psequence; + DiscreteParametricModel *inter_event; + Renewal *renew; + FrequencyDistribution *within_backward , *within_forward , *no_event; + + + within_backward = new FrequencyDistribution(MIN(backward->nb_value , htime->nb_value - 1)); + within_forward = new FrequencyDistribution(MIN(forward->nb_value , htime->nb_value)); + no_event = new FrequencyDistribution(htime->nb_value); + + for (i = 0;i < nb_element;i++) { + psequence = sequence[i] + length[i]; + for (j = 0;j < length[i];j++) { + if (*--psequence == 1) { + (within_backward->frequency[j])++; + break; + } + } + + psequence = sequence[i]; + for (j = 0;j < length[i];j++) { + if (*psequence++ == 1) { + (within_forward->frequency[j + 1])++; + break; + } + } + + if (j == length[i]) { + (no_event->frequency[j])++; + } + } + + within_backward->nb_value_computation(); + within_backward->offset_computation(); + within_backward->nb_element_computation(); + +# ifdef DEBUG + within_backward->max_computation(); + within_backward->mean_computation(); + within_backward->variance_computation(); + + cout << *within_backward; + cout << *backward; +# endif + + within_forward->nb_value_computation(); + within_forward->offset_computation(); + within_forward->nb_element_computation(); + +# ifdef DEBUG + within_forward->max_computation(); + within_forward->mean_computation(); + within_forward->variance_computation(); + + cout << *within_forward; + cout << *forward; +# endif + + no_event->nb_value_computation(); + no_event->offset_computation(); + no_event->nb_element_computation(); + +# ifdef DEBUG + no_event->max_computation(); + no_event->mean_computation(); + no_event->variance_computation(); + + cout << *no_event; +# endif + + if (no_event->nb_element == 0) { + delete no_event; + no_event = NULL; + } + + inter_event = within->estimation(error , os , *within_backward , *within_forward , no_event , + iinter_event , estimator , nb_iter , mean_estimator , + weight , pen_type , outside , htime->mean / mixture->mean); + + delete within_backward; + delete within_forward; + delete no_event; + + if (inter_event) { + renew = new Renewal(*this , *((DiscreteParametric*)inter_event)); + } + else { + renew = NULL; + } + + return renew; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of an equilibrium renewal process on the basis of + * time interval data using the EM algorithm. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] estimator estimator type (maximum likelihood or penalized likelihood), + * \param[in] nb_iter number of iterations, + * \param[in] mean_estimator method of computation of the inter-event distribution mean, + * \param[in] weight penalty weight, + * \param[in] pen_type penalty type, + * \param[in] outside management of side effects (zero outside the support or + * continuation of the distribution). + * + * \return Renewal object. + */ +/*--------------------------------------------------------------*/ + +Renewal* RenewalData::estimation(StatError &error , ostream *os , estimation_criterion estimator , + int nb_iter , duration_distribution_mean_estimator mean_estimator , + double weight , penalty_type pen_type , side_effect outside) const + +{ + double proba; + DiscreteParametric *iinter_event; + Renewal *renew; + + + proba = mixture->mean / htime->mean; + if (proba > 1. - RENEWAL_INIT_PROBABILITY) { + proba = 1. - RENEWAL_INIT_PROBABILITY; + } + else if (proba < RENEWAL_INIT_PROBABILITY) { + proba = RENEWAL_INIT_PROBABILITY; + } + + iinter_event = new DiscreteParametric(NEGATIVE_BINOMIAL , 1 , I_DEFAULT , 1. , + proba , RENEWAL_THRESHOLD); + +# ifdef DEBUG + iinter_event->ascii_print(cout); +# endif + + renew = estimation(error , os , *iinter_event , estimator , nb_iter , + mean_estimator , weight , pen_type , outside); + delete iinter_event; + + return renew; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a renewal process. + * + * \param[in] error reference on a StatError object, + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] ihtime observation frequency distribution. + * + * \return RenewalData object. + */ +/*--------------------------------------------------------------*/ + +RenewalData* Renewal::simulation(StatError &error , process_type itype , + const FrequencyDistribution &ihtime) const + +{ + bool status = true , compute; + int i , j , k , m; + int offset , time_interval , cumul_time , *ptime , *pnb_event , *psequence; + Distribution *dtime; + Renewal *renew; + RenewalData *timev; + + + timev = NULL; + error.init(); + + if ((ihtime.nb_element < 1) || (ihtime.nb_element > RENEWAL_NB_ELEMENT)) { + status = false; + error.update(STAT_error[STATR_SAMPLE_SIZE]); + } + if (ihtime.offset < MAX(inter_event->offset , 2)) { + status = false; + error.update(SEQ_error[SEQR_SHORT_OBSERVATION_TIME]); + } + if (ihtime.nb_value - 1 > MAX_TIME) { + status = false; + error.update(SEQ_error[SEQR_LONG_OBSERVATION_TIME]); + } + + if (status) { + dtime = new Distribution(ihtime); + + if ((itype != type) || (*dtime != *time)) { + compute = true; + } + else { + compute = false; + } + + // construction of a RenewalData object + + timev = new RenewalData(itype , *this); + + timev->renewal = new Renewal(*this , false); + renew = timev->renewal; + + timev->length = new int[ihtime.nb_element]; + timev->sequence = new int*[ihtime.nb_element]; + + switch (itype) { + case ORDINARY : + offset = 0; + break; + case EQUILIBRIUM : + offset = 1; + break; + } + + // 1st to nth event + + ptime = new int[ihtime.nb_element]; + pnb_event = new int[ihtime.nb_element]; + + i = 0; + for (j = ihtime.offset;j < ihtime.nb_value;j++) { + for (k = 0;k < ihtime.frequency[j];k++) { + + // time to the 1st event (equilibrium renewal process) + + if (itype == EQUILIBRIUM) { + if (i == 0) { + cumul_time = renew->forward->simulation(); + } + else { + cumul_time -= *(ptime - 1); + } + (timev->forward->frequency[cumul_time])++; + + time_interval = cumul_time; + } + + // observation period + + *ptime = j; + + timev->length[i] = *ptime + 1 - offset; + timev->sequence[i] = new int[timev->length[i]]; + + // time to the 1st event (ordinary renewal process) + + if (itype == ORDINARY) { + time_interval = renew->inter_event->simulation(); + cumul_time = time_interval; + (timev->inter_event->frequency[time_interval])++; + if (time_interval <= *ptime) { + (timev->within->frequency[time_interval])++; + } + else { + (timev->length_bias->frequency[time_interval])++; + } + } + + psequence = timev->sequence[i] - 1; + if (itype == ORDINARY) { + *++psequence = 1; + } + for (m = 1;m < MIN(time_interval , *ptime + 1);m++) { + *++psequence = 0; + } + if (time_interval <= *ptime) { + *++psequence = 1; + } + + *pnb_event = 0; + while (cumul_time <= *ptime) { + (*pnb_event)++; + time_interval = renew->inter_event->simulation(); + cumul_time += time_interval; + + for (m = cumul_time - time_interval + 1;m < MIN(cumul_time , *ptime + 1);m++) { + *++psequence = 0; + } + + (timev->inter_event->frequency[time_interval])++; + if (cumul_time <= *ptime) { + (timev->within->frequency[time_interval])++; + *++psequence = 1; + } + else { + (timev->length_bias->frequency[time_interval])++; + } + } + + (timev->backward->frequency[*ptime - (cumul_time - time_interval)])++; + if (itype == ORDINARY) { + (timev->forward->frequency[cumul_time - *ptime])++; + } + + ptime++; + pnb_event++; + i++; + } + } + ptime -= ihtime.nb_element; + pnb_event -= ihtime.nb_element; + + // construction of the triplets {observation period, number of events, frequency} and of + // the observation period frequency distribution and number of events frequency distributions + + timev->build(ihtime.nb_element , ptime , pnb_event); + delete [] ptime; + delete [] pnb_event; + + // extraction of the characteristics of the inter-event frequency distribution, + // the frequency distribution of time intervals between events within the observation period, + // the length-biased frequency distribution, + // the backward and forward recurrence time frequency distributions, + + timev->inter_event->nb_value_computation(); + timev->inter_event->offset_computation(); + timev->inter_event->nb_element_computation(); + timev->inter_event->max_computation(); + timev->inter_event->mean_computation(); + timev->inter_event->variance_computation(); + + timev->within->nb_value_computation(); + timev->within->offset_computation(); + timev->within->nb_element_computation(); + timev->within->max_computation(); + timev->within->mean_computation(); + timev->within->variance_computation(); + + timev->length_bias->nb_value_computation(); + timev->length_bias->offset_computation(); + timev->length_bias->nb_element_computation(); + timev->length_bias->max_computation(); + timev->length_bias->mean_computation(); + timev->length_bias->variance_computation(); + + timev->backward->nb_value_computation(); + timev->backward->offset_computation(); + timev->backward->nb_element_computation(); + timev->backward->max_computation(); + timev->backward->mean_computation(); + timev->backward->variance_computation(); + + timev->forward->nb_value_computation(); + timev->forward->offset_computation(); + timev->forward->nb_element_computation(); + timev->forward->max_computation(); + timev->forward->mean_computation(); + timev->forward->variance_computation(); + + // extraction of no-event/event probabilities as a function of time + + timev->build_index_event(offset); + + if (compute) { + renew->computation(false , itype , dtime); + } + delete dtime; + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a renewal process. + * + * \param[in] error reference on a StatError object, + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] nb_element sample size, + * \param[in] itime observation period. + * + * \return RenewalData object. + */ +/*--------------------------------------------------------------*/ + +RenewalData* Renewal::simulation(StatError &error , process_type itype , + int nb_element , int itime) const + +{ + bool status = true; + RenewalData *timev; + + + timev = NULL; + error.init(); + + if ((nb_element < 1) || (nb_element > RENEWAL_NB_ELEMENT)) { + status = false; + error.update(STAT_error[STATR_SAMPLE_SIZE]); + } + if (itime < MAX(inter_event->offset , 2)) { + status = false; + error.update(SEQ_error[SEQR_SHORT_OBSERVATION_TIME]); + } + if (itime > MAX_TIME) { + status = false; + error.update(SEQ_error[SEQR_LONG_OBSERVATION_TIME]); + } + + if (status) { + FrequencyDistribution htime(itime + 1); + + htime.nb_element = nb_element; + htime.offset = itime; + htime.max = nb_element; + htime.mean = itime; + htime.variance = 0.; + htime.frequency[itime] = nb_element; + + timev = simulation(error , itype , htime); + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a renewal process. + * + * \param[in] error reference on a StatError object, + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] nb_element sample size, + * \param[in] itimev reference on a TimeEvents object. + * + * \return RenewalData object. + */ +/*--------------------------------------------------------------*/ + +RenewalData* Renewal::simulation(StatError &error , process_type itype , + int nb_element , const TimeEvents &itimev) const + +{ + FrequencyDistribution *htime; + RenewalData *timev; + + + error.init(); + + if ((nb_element < 1) || (nb_element > RENEWAL_NB_ELEMENT)) { + timev = NULL; + error.update(STAT_error[STATR_SAMPLE_SIZE]); + } + + else { + htime = itimev.htime->frequency_scale(nb_element); + + timev = simulation(error , itype , *htime); + delete htime; + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the RenewalIterator class. + * + * \param[in] irenewal pointer on a Renewal object, + * \param[in] ilength sequence length. + */ +/*--------------------------------------------------------------*/ + +RenewalIterator::RenewalIterator(Renewal *irenewal , int ilength) + +{ + renewal = irenewal; + (renewal->nb_iterator)++; + + interval = 0; + counter = 0; + + length = ilength; + sequence = new int[length]; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a RenewalIterator object. + * + * \param[in] iter reference on a RenewalIterator object. + */ +/*--------------------------------------------------------------*/ + +void RenewalIterator::copy(const RenewalIterator &iter) + +{ + int i; + int *psequence , *isequence; + + + renewal = iter.renewal; + (renewal->nb_iterator)++; + + interval = iter.interval; + counter = iter.counter; + length = iter.length; + + sequence = new int[length]; + + psequence = sequence; + isequence = iter.sequence; + for (i = 0;i < length;i++) { + *psequence++ = *isequence++; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the RenewalIterator class. + */ +/*--------------------------------------------------------------*/ + +RenewalIterator::~RenewalIterator() + +{ + (renewal->nb_iterator)--; + delete [] sequence; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the RenewalIterator class. + * + * \param[in] iter reference on a RenewalIterator object. + * + * \return RenewalIterator object. + */ +/*--------------------------------------------------------------*/ + +RenewalIterator& RenewalIterator::operator=(const RenewalIterator &iter) + +{ + if (&iter != this) { + (renewal->nb_iterator)--; + delete [] sequence; + + copy(iter); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a renewal process. + * + * \param[in] ilength sequence length, + * \param[in] type renewal process type (ORDINARY/EQUILIBRIUM). + */ +/*--------------------------------------------------------------*/ + +void RenewalIterator::simulation(int ilength , process_type type) + +{ + int i; + int offset , *psequence; + + + switch (type) { + case ORDINARY : + offset = 1; + break; + default : + offset = 0; + break; + } + + if (ilength + offset != length) { + length = ilength + offset; + delete [] sequence; + sequence = new int[length]; + } + + psequence = sequence; + + switch (type) { + case ORDINARY : + interval = renewal->inter_event->simulation(); + *psequence++ = 1; + counter = 1; + break; + case EQUILIBRIUM : + interval = renewal->forward->simulation(); + counter = 1; + break; + } + + for (i = offset;i < length;i++) { + if (counter < interval) { + *psequence++ = 0; + counter++; + } + + else { + interval = renewal->inter_event->simulation(); + *psequence++ = 1; + counter = 1; + } + } +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/renewal_distributions.cpp b/src/cpp/sequence_analysis/renewal_distributions.cpp new file mode 100644 index 0000000..d66f85f --- /dev/null +++ b/src/cpp/sequence_analysis/renewal_distributions.cpp @@ -0,0 +1,1013 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include "renewal.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the length-biased distribution from the inter-event distribution. + * + * \param[in] inter_event inter-event distribution. + */ +/*--------------------------------------------------------------*/ + +void LengthBias::computation(const DiscreteParametric &inter_event) + +{ + int i; + double norm , *pmass , *imass; + + + offset = inter_event.offset; + nb_value = inter_event.nb_value; + + pmass = mass; + for (i = 0;i < offset;i++) { + *pmass++ = 0.; + } + imass = inter_event.mass + offset; + + // computation of the normalization quantity + + if (ident == CATEGORICAL) { + norm = inter_event.mean; + } + else { + norm = parametric_mean_computation(); + } + + // computation of the probability mass function + + for (i = offset;i < nb_value;i++) { + *pmass++ = i * *imass++ / norm; + } + + cumul_computation(); + + max_computation(); + mean_computation(); + variance_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the backward recurrence time distribution from + * the inter-event distribution. + * + * \param[in] inter_event inter-event distribution, + * \param[in] time observation period distribution. + */ +/*--------------------------------------------------------------*/ + +void Backward::computation(const DiscreteParametric &inter_event , const Distribution &time) + +{ + int i , j; + double norm , sum , *pmass , *icumul , *scumul , *tmass , *tcumul; + + + offset = 0; + nb_value = MIN(inter_event.nb_value - 1 , time.nb_value); + + pmass = mass; + icumul = inter_event.cumul; + + // computation of the normalization quantity + + if (ident == CATEGORICAL) { + norm = inter_event.mean; + } + else { + norm = parametric_mean_computation(); + } + + // computation of the probability mass function + + tmass = time.mass; + tcumul = time.cumul; + + for (i = 0;i < nb_value;i++) { + *pmass = (1. - *tcumul) * (1. - *icumul) / norm; + + if (*tmass > 0.) { + scumul = icumul; + sum = 0.; + for (j = i;j < inter_event.nb_value - 1;j++) { + sum += (1. - *scumul++); + } + + *pmass += *tmass * sum / norm; + } + + pmass++; + icumul++; + tmass++; + tcumul++; + } + + cumul_computation(); + + max_computation(); + mean_computation(); + variance_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of events distribution from + * the inter-event distribution. + * + * \param[in] inter_event reference on the inter-event distribution. + */ +/*--------------------------------------------------------------*/ + +void NbEvent::computation(DiscreteParametric &inter_event) + +{ + int i , j; + int time_nb_value; + double bcumul = 1. , previous_cumul = 1. , *pmass , *pcumul; + DiscreteParametric *nevent_time; + Forward *forward; + + + // computation of the number of values + + switch (type) { + case ORDINARY : + nb_value = time / inter_event.offset + 1; + break; + case EQUILIBRIUM : + nb_value = (time - 1) / inter_event.offset + 2; + break; + } + + switch (type) { + case ORDINARY : + time_nb_value = time + 1; + break; + case EQUILIBRIUM : + time_nb_value = time; + break; + } + + nevent_time = new DiscreteParametric(time + 1 , ident); + + pmass = mass - 1; + pcumul = cumul - 1; + + for (i = 0;i < nb_value;i++) { + if (i < nb_value - 1) { + + if (i == 0) { + switch (type) { + + // ordinary renewal process: time to the 1st event distributed according to + // the inter-event distribution + + case ORDINARY : { + nevent_time->mass_copy(inter_event , time + 1); + break; + } + + // equilibrium renewal process: time to the 1st event distributed according to + // the forward recurrence time distribution + + case EQUILIBRIUM : { + forward = new Forward(inter_event); + nevent_time->mass_copy(*forward , time + 1); + break; + } + } + + nevent_time->cumul_computation(); + } + + else { + switch (type) { + case ORDINARY : + j = i + 1; + break; + case EQUILIBRIUM : + j = i; + break; + } + + // computation of the time to the (n+1)th events distribution + + if ((j == 1) && (ident != CATEGORICAL)) { + nevent_time->mass_copy(inter_event , time + 1); + nevent_time->cumul_computation(); + } + + else { + switch (ident) { + + case CATEGORICAL : { + nevent_time->convolution(inter_event , *nevent_time , time + 1); + nevent_time->cumul_computation(); + break; + } + + case BINOMIAL : { + nevent_time->inf_bound = j * inter_event.inf_bound; + nevent_time->sup_bound = j * inter_event.sup_bound; + nevent_time->probability = inter_event.probability; + nevent_time->binomial_computation(time_nb_value , RENEWAL); + break; + } + + case POISSON : { + nevent_time->inf_bound = j * inter_event.inf_bound; + nevent_time->parameter = j * inter_event.parameter; + nevent_time->poisson_computation(time_nb_value , RENEWAL_THRESHOLD , RENEWAL); + break; + } + + case NEGATIVE_BINOMIAL : { + nevent_time->inf_bound = j * inter_event.inf_bound; + nevent_time->parameter = j * inter_event.parameter; + nevent_time->probability = inter_event.probability; + nevent_time->negative_binomial_computation(time_nb_value , RENEWAL_THRESHOLD , RENEWAL); + break; + } + } + } + + if ((type == EQUILIBRIUM) && (ident != CATEGORICAL)) { + nevent_time->convolution(*forward , *nevent_time , time + 1); + nevent_time->cumul_computation(); + } + } + + if (time < nevent_time->nb_value) { + bcumul = MIN(nevent_time->cumul[time] , 1.); + } + *++pmass = previous_cumul - bcumul; + *++pcumul = 1. - bcumul; + previous_cumul = bcumul; + } + + else { + *++pmass = previous_cumul; + *++pcumul = 1.; + } + } + + delete nevent_time; + if (type == EQUILIBRIUM) { + delete forward; + } + + offset_computation(); + max_computation(); + mean_computation(); + variance_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Fast computation (O(n)) in the case of an ordinary renewal process of + * the number of events distribution from a binomial inter-event distribution. + */ +/*--------------------------------------------------------------*/ + +void NbEvent::binomial_computation() + +{ + int i , j; + int main_set , main_subset , inf_bound_set , inf_bound_subset , + rapid_index , nb_term; + double failure = 1. - probability , success = probability , k_success , main_term , + inf_bound_term , sum , scale , *pmass , *pcumul; + + + nb_value = time / inf_bound + 1; + + pmass = mass; + pcumul = cumul; + + // computation of the success probability at the power (upper bound - lower bound) + + k_success = 1.; + for (j = 0;j < sup_bound - inf_bound;j++) { + k_success *= success; + } + main_term = k_success; + + // values of null probability such that (observation period + 1) > (i + 1) * (upper bound) + + i = 0; + while (time + 1 > (i + 1) * sup_bound) { + main_term *= k_success; + *pmass++ = 0.; + *pcumul++ = 0.; + i++; + } + + // computation of the first non-null probability value (exhaustive computation) + + main_subset = (i + 1) * (sup_bound - inf_bound); + main_set = main_subset; + sum = main_term; + + nb_term = (i + 1) * sup_bound - time - 1; + if ((i == nb_value - 1) && ((i + 1) * inf_bound > time + 1)) { + nb_term = (i + 1) * (sup_bound - inf_bound); + } + + for (j = 0;j < nb_term;j++) { + scale = (double)main_subset / (double)(main_set - (main_subset - 1)); + main_subset--; + main_term *= scale * failure / success; + sum += main_term; + } + *pmass = sum; + *pcumul = sum; + i++; + rapid_index = i; + + // fast computation of the probability masses for the following values + + while (i < nb_value) { + + // computation of the main terms + + // computation of the 1st term + + if (i > rapid_index) { + if (inf_bound == 0) { + main_set++; + scale = (double)main_set / (double)(main_set - main_subset); + main_term *= scale * failure; + } + else { + main_subset = inf_bound_subset; + main_set = inf_bound_set; + main_term = inf_bound_term; + } + } + if ((i == rapid_index) || (inf_bound > 0)) { + scale = (double)main_subset / (double)(main_set - (main_subset - 1)); + main_subset--; + main_term *= scale * failure; + } + *++pmass = main_term; + + // computation of the (j - lower bound - 1) following terms + + for (j = 1;j <= sup_bound - inf_bound - 1;j++) { + main_set++; + scale = (double)main_set / (double)(main_set - main_subset); + main_term *= scale * failure; + *pmass += main_term; + } + + // computation of the terms corresponding to the lower bound + + // computation of the 1st term + + if (inf_bound > 0) { + inf_bound_subset = main_subset; + inf_bound_set = main_set + 1; + scale = (double)inf_bound_set / (double)(inf_bound_set - inf_bound_subset); + inf_bound_term = main_term * scale * failure / success; + *pmass += inf_bound_term; + } + + // computation of the (lower bound - 1) following terms + + if (inf_bound > 1) { + nb_term = inf_bound - 1; + if ((i == nb_value - 1) && ((i + 1) * inf_bound > time + 1)) { + nb_term = time - i * inf_bound; + } + for (j = 0;j < nb_term;j++) { + scale = (double)inf_bound_subset / (double)(inf_bound_set - (inf_bound_subset - 1)); + inf_bound_subset--; + inf_bound_term *= scale * failure / success; + *pmass += inf_bound_term; + } + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + + offset_computation(); + max_computation(); + mean_computation(); + variance_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Fast computation (O(n)) in the case of an ordinary renewal process of + * the number of events distribution from a negative binomial inter-event distribution. + * This computation requires an integer-valued negative binomial shape parameter. + */ +/*--------------------------------------------------------------*/ + +void NbEvent::negative_binomial_computation() + +{ + int i , j; + int main_set , main_subset , inf_bound_set , inf_bound_subset; + double failure = 1. - probability , success = probability , main_term , inf_bound_term , + scale , *pmass; + + + nb_value = time / inf_bound + 1; + + pmass = mass; + + // computation of the terms for number of events = 0 + + main_term = 1.; + for (i = 0;i < parameter;i++) { + main_term *= success; + } + *pmass = main_term; + + main_subset = (int)parameter - 1; + main_set = main_subset; + for (i = inf_bound + 1;i <= time;i++) { + main_set++; + scale = (double)main_set / (double)(main_set - main_subset); + main_term *= scale * failure; + *pmass += main_term; + } + *pmass = 1. - *pmass; + pmass++; + + // computation of probability masses for values from 1 to (observation period / lower bound) + + for (i = 1;i < nb_value;i++) { + *pmass = 0.; + + // computation of the terms corresponding to the lower bound + + // computation of the 1st term + + if (inf_bound > 1) { + if (i == 1) { + inf_bound_subset = main_subset; + inf_bound_set = main_set; + inf_bound_term = main_term; + } + else { + scale = (double)(main_set - main_subset) / (double)main_set; + inf_bound_subset = main_subset; + inf_bound_set = main_set - 1; + inf_bound_term = main_term * scale * success / failure; + } + *pmass += inf_bound_term; + + // computation of the (lower bound - 2) following terms + + if (inf_bound > 2) { + for (j = 0;j < inf_bound - 2;j++) { + scale = (double)(inf_bound_set - inf_bound_subset) / (double)inf_bound_set; + inf_bound_set--; + inf_bound_term *= scale / failure; + *pmass += inf_bound_term; + } + } + } + + // computation of the main terms + + // computation of the 1st term + + if (i == 1) { + main_subset++; + main_set++; + scale = (double)main_set / (double)main_subset; + main_term *= scale * failure; + } + else { + scale = (double)(main_set - main_subset) / (double)(main_subset + 1); + main_subset++; + main_term *= scale * success; + } + if (inf_bound > 1) { + for (j = 0;j < inf_bound - 1;j++) { + scale = (double)(main_set - main_subset) / (double)main_set; + main_set--; + main_term *= scale / failure; + } + } + if (parameter == 1) { + main_term /= failure; + } + *pmass += main_term; + + // computation of the (k - 1) following terms + + if (parameter > 1) { + for (j = 1;j <= (int)parameter - 1;j++) { + main_subset++; + main_set++; + scale = (double)main_set / (double)main_subset; + main_term *= scale * success; + if (j == parameter - 1) { + main_term /= failure; + } + *pmass += main_term; + } + } + + pmass++; + } + + offset_computation(); + cumul_computation(); + + max_computation(); + mean_computation(); + variance_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation in the case of an ordinary renewal process of + * the number of events distribution from a parametric inter-event + * distribution (binomial, Poisson, negative binomial). + * + * \param[in] inter_event reference on an inter-event distribution. + */ +/*--------------------------------------------------------------*/ + +void NbEvent::ordinary_computation(DiscreteParametric &inter_event) + +{ + if (type == ORDINARY) { + if ((ident == BINOMIAL) && + (time * (1. - probability) / (probability * sqrt((double)(MAX(1 , inf_bound)))) < RB_THRESHOLD)) { + binomial_computation(); + } + + else if ((ident == NEGATIVE_BINOMIAL) && (parameter == (int)parameter) && (inf_bound > 0) && + (time - (time / inf_bound + 1) * inf_bound + 1 >= 0) && + (time * probability / ((1. - probability) * sqrt((double)(MAX(1 , inf_bound)))) < RNB_THRESHOLD)) { + +# ifdef DEBUG + cout << "algebric calculation" << endl; +# endif + + negative_binomial_computation(); + } + + else { + computation(inter_event); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of non-event/event probabilities as a function of time. + */ +/*--------------------------------------------------------------*/ + +void Renewal::index_event_computation() + +{ + int i , j; + double no_event , event , *pindex_event; + + + switch (type) { + + case ORDINARY : { + index_event->offset = 0; + + index_event->point[0][0] = 0.; + index_event->point[1][0] = 1.; + break; + } + + case EQUILIBRIUM : { + index_event->offset = 1; + + index_event->point[0][0] = 0.; + index_event->point[1][0] = 0.; + break; + } + } + + for (i = 1;i < index_event->length;i++) { + pindex_event = index_event->point[1] + i; + no_event = 0.; + event = 0.; + + for (j = 1;j <= MIN(i , inter_event->nb_value - 1);j++) { + if (j < i) { + no_event += (1. - inter_event->cumul[j]) * *--pindex_event; + event += inter_event->mass[j] * *pindex_event; + } + + else { + switch (type) { + case ORDINARY : + no_event += 1. - inter_event->cumul[j]; + event += inter_event->mass[j]; + break; + case EQUILIBRIUM : + no_event += (1. - forward->cumul[j]); + event += forward->mass[j]; + break; + } + } + } + + index_event->point[0][i] = no_event; + index_event->point[1][i] = event; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distributions of a renewal process from + * the inter-event distribution. + * time interval: length-biased distribution, + * backward and forward recurrence time distributions, + * count: number of events distributions and resulting mixture, + * intensity: no-event/event probabilities as a function of time. + * + * \param[in] inter_event_flag flag for the computation of the inter-event distribution, + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] dtime pointer on the observation period distribution. + */ +/*--------------------------------------------------------------*/ + +void Renewal::computation(bool inter_event_flag , process_type itype , const Distribution *dtime) + +{ + int i , j; + int nb_value , time_nb_value; + double sum , *tmass , *pmass , *cumul , *previous_cumul , *pcumul1 , *pcumul2; + DiscreteParametric *pnevent_time , *power , *forward_power; + + + if (itype == DEFAULT_TYPE) { + itype = type; + } + + // computation of the inter-event distribution, the length-biased distribution and + // the forward recurrence time distribution + + if (inter_event_flag) { + inter_event->computation(1 , RENEWAL_THRESHOLD); + length_bias->computation(*inter_event); + forward->computation(*inter_event); + } + + if ((itype != type) || ((dtime) && (*dtime != *time))) { + type_init(itype); + + tmass = time->mass + time->offset; + for (i = time->offset;i < time->nb_value;i++) { + if (*tmass++ > 0.) { + delete nb_event[i]; + } + } + + delete mixture; + + if ((dtime) && (*dtime != *time)) { + delete time; + + for (i = 1;i <= nb_event_max;i++) { + delete nevent_time[i]; + } + delete [] nevent_time; + + delete [] nb_event; + + delete index_event; + + time = new Distribution(*dtime); + + switch (type) { + case ORDINARY : + nb_event_max = (time->nb_value - 1) / inter_event->offset; + break; + case EQUILIBRIUM : + nb_event_max = (time->nb_value - 2) / inter_event->offset + 1; + break; + } + + nevent_time = new DiscreteParametric*[nb_event_max + 1]; + for (i = 0;i <= nb_event_max;i++) { + nevent_time[i] = NULL; + } + + nb_event = new NbEvent*[time->nb_value]; + + index_event = new Curves(2 , time->nb_value); + } + + for (i = 0;i < time->offset;i++) { + nb_event[i] = NULL; + } + + tmass = time->mass + time->offset; + for (i = time->offset;i < time->nb_value;i++) { + if (*tmass++ > 0.) { + switch (type) { + case ORDINARY : + nb_value = i / inter_event->offset + 1; + break; + case EQUILIBRIUM : + nb_value = (i - 1) / inter_event->offset + 2; + break; + } + + nb_event[i] = new NbEvent(type , i , nb_value , inter_event->ident); + } + + else { + nb_event[i] = NULL; + } + } + + init(inter_event->inf_bound , inter_event->sup_bound , + inter_event->parameter , inter_event->probability); + + mixture = new Distribution(nb_value); + } + + // computation of the backward recurrence time distribution + + backward->computation(*inter_event , *time); + + // construction and initialization of the cumulative distribution functions + + cumul = new double[time->nb_value]; + previous_cumul = new double[time->nb_value]; + + tmass = time->mass + time->offset; + pcumul1 = cumul + time->offset; + pcumul2 = previous_cumul + time->offset; + + for (i = time->offset;i < time->nb_value;i++) { + if (*tmass++ > 0.) { + *pcumul1 = 1.; + *pcumul2 = 1.; + } + pcumul1++; + pcumul2++; + } + + // computation of the number of values of the number of events distributions and the resulting mixture + + tmass = time->mass + time->offset; + + for (i = time->offset;i < time->nb_value;i++) { + if (*tmass++ > 0.) { + switch (type) { + case ORDINARY : + nb_event[i]->nb_value = i / inter_event->offset + 1; + break; + case EQUILIBRIUM : + nb_event[i]->nb_value = (i - 1) / inter_event->offset + 2; + break; + } + } + } + + mixture->nb_value = nb_event[time->nb_value - 1]->nb_value; + + switch (type) { + case ORDINARY : + time_nb_value = time->nb_value; + break; + case EQUILIBRIUM : + time_nb_value = time->nb_value - 1; + break; + } + + if (type == EQUILIBRIUM) { + pnevent_time = new DiscreteParametric(time->nb_value , inter_event->ident); + power = pnevent_time; + } + + // computation of the number of events distributions and the ressulting mixture + + pmass = mixture->mass; + + for (i = 0;i < mixture->nb_value;i++) { + if (i < mixture->nb_value - 1) { + j = i + 1; + + if (!nevent_time[j]) { + nevent_time[j] = new DiscreteParametric(time->nb_value , inter_event->ident); + } + + switch (type) { + case ORDINARY : + power = nevent_time[j]; + break; + case EQUILIBRIUM : + forward_power = nevent_time[j]; + break; + } + + if (i == 0) { + if (type == EQUILIBRIUM) { + forward_power->mass_copy(*forward , time->nb_value); + forward_power->cumul_computation(); + } + + power->mass_copy(*inter_event , time->nb_value); + power->cumul_computation(); + } + + else { + if (type == EQUILIBRIUM) { + forward_power->convolution(*forward , *power , time->nb_value); + forward_power->cumul_computation(); + } + + // computation of the time to the (n+1)th event distribution + + power->ident = inter_event->ident; + + switch (inter_event->ident) { + + case CATEGORICAL : { + switch (type) { + case ORDINARY : + power->convolution(*inter_event , *nevent_time[j - 1] , time_nb_value); + break; + case EQUILIBRIUM : + power->convolution(*inter_event , *pnevent_time , time_nb_value); + break; + } + + power->cumul_computation(); + break; + } + + case BINOMIAL : { + power->inf_bound = j * inter_event->inf_bound; + power->sup_bound = j * inter_event->sup_bound; + power->probability = inter_event->probability; + power->binomial_computation(time_nb_value , RENEWAL); + break; + } + + case POISSON : { + power->inf_bound = j * inter_event->inf_bound; + power->parameter = j * inter_event->parameter; + power->poisson_computation(time_nb_value , RENEWAL_THRESHOLD , RENEWAL); + break; + } + + case NEGATIVE_BINOMIAL : { + power->inf_bound = j * inter_event->inf_bound; + power->parameter = j * inter_event->parameter; + power->probability = inter_event->probability; + power->negative_binomial_computation(time_nb_value , RENEWAL_THRESHOLD , RENEWAL); + break; + } + } + } + } + + // computation of the number of events distributions and the resulting mixture + + tmass = time->mass + time->offset; + pcumul1 = cumul + time->offset; + pcumul2 = previous_cumul + time->offset; + sum = 0.; + + for (j = time->offset;j < time->nb_value;j++) { + if (*tmass > 0.) { + if (i == nb_event[j]->nb_value - 1) { + *pcumul1 = 0.; + } + + else { + switch (type) { + + case ORDINARY : { + if (j < power->nb_value) { + *pcumul1 = MIN(power->cumul[j] , 1.); + } + break; + } + + case EQUILIBRIUM : { + if (j < forward_power->nb_value) { + *pcumul1 = MIN(forward_power->cumul[j] , 1.); + } + break; + } + } + } + + if (i < nb_event[j]->nb_value) { + nb_event[j]->mass[i] = *pcumul2 - *pcumul1; + nb_event[j]->cumul[i] = 1. - *pcumul1; + *pcumul2 = *pcumul1; + sum += *tmass * nb_event[j]->mass[i]; + } + } + + tmass++; + pcumul1++; + pcumul2++; + } + + *pmass++ = sum; + } + + for (i = 1;i < mixture->nb_value;i++) { + nevent_time[i]->max_computation(); + } + + tmass = time->mass + time->offset; + + for (i = time->offset;i < time->nb_value;i++) { + if (*tmass++ > 0.) { + nb_event[i]->offset_computation(); + nb_event[i]->max_computation(); + nb_event[i]->mean_computation(); + nb_event[i]->variance_computation(); + } + } + + mixture->offset_computation(); + mixture->cumul_computation(); + + mixture->max_computation(); + mixture->mean_computation(); + mixture->variance_computation(); + + if (type == EQUILIBRIUM) { + delete pnevent_time; + } + + delete [] cumul; + delete [] previous_cumul; + + index_event_computation(); +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/semi_markov.cpp b/src/cpp/sequence_analysis/semi_markov.cpp new file mode 100644 index 0000000..3ab07f4 --- /dev/null +++ b/src/cpp/sequence_analysis/semi_markov.cpp @@ -0,0 +1,3644 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include +#include +#include + +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "semi_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the SemiMarkovChain class. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovChain::SemiMarkovChain() + +{ + sojourn_type = NULL; + state_process = NULL; + forward = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkovChain class. + * + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] inb_state number of states. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovChain::SemiMarkovChain(process_type itype , int inb_state) +:Chain(itype , inb_state) + +{ + sojourn_type = NULL; + state_process = new CategoricalSequenceProcess(nb_state , nb_state , false); + forward = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkovChain class. + * + * \param[in] pchain pointer on a Chain object, + * \param[in] poccupancy pointer on a CategoricalSequenceProcess object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovChain::SemiMarkovChain(const Chain *pchain , const CategoricalSequenceProcess *poccupancy) +:Chain(*pchain), + sojourn_type(NULL), + state_process(NULL), + forward(NULL) + +{ + int i; + + sojourn_type = new state_sojourn_type[nb_state]; + + state_process = new CategoricalSequenceProcess(*poccupancy); + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + state_process->absorption[i] = 0.; + } + else { + state_process->absorption[i] = 1.; + } + } + +# ifdef DEBUG + assert(forward == NULL); +# endif + forward = new Forward*[nb_state]; + + for (i = 0;i < nb_state;i++) { + sojourn_type[i] = (state_process->sojourn_time[i] ? SEMI_MARKOVIAN : MARKOVIAN); + + if ((sojourn_type[i] == SEMI_MARKOVIAN) && (stype[i] == RECURRENT)) { + forward[i] = new Forward(*(state_process->sojourn_time[i])); + } + else { + forward[i] = NULL; + } + } + + if (type == EQUILIBRIUM) { + for (i = 0;i < nb_state;i++) { + initial[i] = 1. / (double)nb_state; + } + initial_probability_computation(); + } +} + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkovChain class. + * + * \param[in] smarkov reference on a Chain SemiMarkovChain object, + * \param[in] param parameter + */ +/*--------------------------------------------------------------*/ + +SemiMarkovChain::SemiMarkovChain(const SemiMarkovChain &smarkov , int param) +: Chain(smarkov), + sojourn_type(NULL), + state_process(NULL), + forward(NULL) +{ copy(smarkov , param); } + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a SemiMarkovChain object. + * + * \param[in] smarkov reference on a SemiMarkovChain object, + * \param[in] param parameter (if > 0: number of allocated values for the state occupancy distributions). + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::copy(const SemiMarkovChain &smarkov , int param) + +{ + int i; + +# ifdef DEBUG + assert(sojourn_type == NULL); +# endif + + sojourn_type = new state_sojourn_type[nb_state]; + for (i = 0;i < nb_state;i++) { + sojourn_type[i] = smarkov.sojourn_type[i]; + } + + forward = new Forward*[nb_state]; + + for (i = 0;i < nb_state;i++) { + if (smarkov.forward[i]) { + forward[i] = new Forward(*(smarkov.forward[i]) , param); + } + else { + forward[i] = NULL; + } + } + + switch (param) { + case I_DEFAULT : + state_process = new CategoricalSequenceProcess(*(smarkov.state_process)); + break; + case 0 : + state_process = new CategoricalSequenceProcess(*(smarkov.state_process) , INIT_OCCUPANCY , I_DEFAULT); + break; + default : + state_process = new CategoricalSequenceProcess(*(smarkov.state_process) , INIT_OCCUPANCY , param); + break; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a SemiMarkovChain object. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::remove() + +{ + int i; + + + if (sojourn_type != NULL) + delete [] sojourn_type; + + sojourn_type = NULL; + + if (state_process != NULL) + delete state_process; + state_process = NULL; + + if (forward != NULL) { + for (i = 0;i < nb_state;i++) { + delete forward[i]; + forward[i] = NULL; + } + delete [] forward; + forward = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the SemiMarkovChain class. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovChain::~SemiMarkovChain() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the SemiMarkovChain class. + * + * \param[in] smarkov reference on a SemiMarkovChain object. + * + * \return SemiMarkovChain object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovChain& SemiMarkovChain::operator=(const SemiMarkovChain &smarkov) + +{ + if (&smarkov != this) { + remove(); + Chain::remove(); + + Chain::copy(smarkov); + copy(smarkov); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of parameters of a SemiMarkovChain object. + * + * \param[in] min_probability minimum probability. + * + * \return number of parameters. + */ +/*--------------------------------------------------------------*/ + +int SemiMarkovChain::nb_parameter_computation(double min_probability) const + +{ + int i; + int nb_parameter = Chain::nb_parameter_computation(min_probability); + + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + nb_parameter += state_process->sojourn_time[i]->nb_parameter_computation(); + if (state_process->sojourn_time[i]->inf_bound == 1) { + nb_parameter--; + } + } + } + + return nb_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the SemiMarkov class. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov::SemiMarkov() + +{ + nb_iterator = 0; + semi_markov_data = NULL; + + nb_output_process = 0; + categorical_process = NULL; + discrete_parametric_process = NULL; + continuous_parametric_process = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkov class. + * + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] inb_state number of states, + * \param[in] inb_output_process number of observation processes, + * \param[in] nb_value number of observed values for each observation process. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov::SemiMarkov(process_type itype , int inb_state , int inb_output_process , int *nb_value) +:SemiMarkovChain(itype , inb_state) + +{ + int i; + + + nb_iterator = 0; + semi_markov_data = NULL; + + nb_output_process = inb_output_process; + + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + discrete_parametric_process = new DiscreteParametricProcess*[nb_output_process]; + continuous_parametric_process = new ContinuousParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (nb_value[i] == I_DEFAULT) { + categorical_process[i] = NULL; + discrete_parametric_process[i] = NULL; + continuous_parametric_process[i] = new ContinuousParametricProcess(nb_state); + } + + else if (nb_value[i] <= NB_OUTPUT) { + categorical_process[i] = new CategoricalSequenceProcess(nb_state , nb_value[i] , true); + discrete_parametric_process[i] = NULL; + continuous_parametric_process[i] = NULL; + } + + else { + categorical_process[i] = NULL; + discrete_parametric_process[i] = new DiscreteParametricProcess(nb_state , (int)(nb_value[i] * SAMPLE_NB_VALUE_COEFF)); + continuous_parametric_process[i] = NULL; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkov class. + * + * \param[in] pchain pointer on a Chain object, + * \param[in] poccupancy pointer on a CategoricalSequenceProcess object, + * \param[in] pobservation pointer on a CategoricalProcess object, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov::SemiMarkov(const Chain *pchain , const CategoricalSequenceProcess *poccupancy , + const CategoricalProcess *pobservation , + int length , bool counting_flag) +:SemiMarkovChain(pchain , poccupancy) + +{ + int i; + + + nb_iterator = 0; + semi_markov_data = NULL; + + nb_output_process = (pobservation ? 1 : 0); + + if (nb_output_process == 1) { + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + categorical_process[0] = new CategoricalSequenceProcess(*pobservation); + } + else { + categorical_process = NULL; + } + + discrete_parametric_process = NULL; + continuous_parametric_process = NULL; + + if (length > COUNTING_MAX_LENGTH) { + counting_flag = false; + } + characteristic_computation(length , counting_flag); +} + + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkov class. + * + * \param[in] smarkov reference on a SemiMarkov object, + * \param[in] data_flag flag copy of the included SemiMarkovData object, + * \param[in] pobservation pointer on a CategoricalProcess object, + * \param[in] param parameter. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov::SemiMarkov(const SemiMarkov &smarkov , + bool data_flag, + int param) +: nb_iterator(0), + semi_markov_data(NULL), + nb_output_process(0), + categorical_process(NULL), + discrete_parametric_process(NULL), + continuous_parametric_process(NULL), +SemiMarkovChain(smarkov , param) +{ copy(smarkov , data_flag , param); } + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a SemiMarkov object. + * + * \param[in] smarkov reference on a SemiMarkov object, + * \param[in] data_flag flag copy of the included SemiMarkovData object, + * \param[in] param parameter. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::copy(const SemiMarkov &smarkov , bool data_flag , int param) + +{ + int i; + + + nb_iterator = 0; + + if ((data_flag) && (smarkov.semi_markov_data)) { + semi_markov_data = new SemiMarkovData(*(smarkov.semi_markov_data) , false); + } + else { + semi_markov_data = NULL; + } + + nb_output_process = smarkov.nb_output_process; + + if (smarkov.categorical_process) { + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + + switch (param) { + + case I_DEFAULT : { + for (i = 0;i < nb_output_process;i++) { + if (smarkov.categorical_process[i]) { + categorical_process[i] = new CategoricalSequenceProcess(*(smarkov.categorical_process[i])); + } + else { + categorical_process[i] = NULL; + } + } + break; + } + + default : { + for (i = 0;i < nb_output_process;i++) { + if (smarkov.categorical_process[i]) { + categorical_process[i] = new CategoricalSequenceProcess(*(smarkov.categorical_process[i]) , + CATEGORICAL_SEQUENCE_PROCESS_COPY , false); + } + else { + categorical_process[i] = NULL; + } + } + break; + } + } + } + + else { + categorical_process = NULL; + } + + if (smarkov.discrete_parametric_process) { + discrete_parametric_process = new DiscreteParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (smarkov.discrete_parametric_process[i]) { + discrete_parametric_process[i] = new DiscreteParametricProcess(*(smarkov.discrete_parametric_process[i])); + } + else { + discrete_parametric_process[i] = NULL; + } + } + } + + else { + discrete_parametric_process = NULL; + } + + if (smarkov.continuous_parametric_process) { + continuous_parametric_process = new ContinuousParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (smarkov.continuous_parametric_process[i]) { + continuous_parametric_process[i] = new ContinuousParametricProcess(*(smarkov.continuous_parametric_process[i])); + } + else { + continuous_parametric_process[i] = NULL; + } + } + } + + else { + continuous_parametric_process = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a SemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::remove() + +{ + int i; + + if (semi_markov_data != NULL){ + delete semi_markov_data; + semi_markov_data = NULL; + } + + + if (categorical_process != NULL) { + for (i = 0;i < nb_output_process;i++) { + if (categorical_process[i] != NULL) { + delete categorical_process[i]; + categorical_process[i] = NULL; + } + } + delete [] categorical_process; + categorical_process = NULL; + } + + if (discrete_parametric_process != NULL) { + for (i = 0;i < nb_output_process;i++) { + if (discrete_parametric_process[i] != NULL) { + delete discrete_parametric_process[i]; + discrete_parametric_process[i] = NULL; + } + } + delete [] discrete_parametric_process; + discrete_parametric_process = NULL; + } + + if (continuous_parametric_process != NULL) { + for (i = 0;i < nb_output_process;i++) { + if (continuous_parametric_process[i] != NULL) { + delete continuous_parametric_process[i]; + continuous_parametric_process[i] = NULL; + } + } + delete [] continuous_parametric_process; + continuous_parametric_process = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the SemiMarkov class. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov::~SemiMarkov() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of a SemiMarkov object taking account of + * the number of iterators pointing to it. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::conditional_delete() + +{ + if (nb_iterator == 0) { + delete this; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the SemiMarkov class. + * + * \param[in] smarkov reference on a SemiMarkov object. + * + * \return SemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov& SemiMarkov::operator=(const SemiMarkov &smarkov) + +{ + if ((&smarkov != this) && (nb_iterator == 0)) { + remove(); + SemiMarkovChain::remove(); + Chain::remove(); + + Chain::copy(smarkov); + SemiMarkovChain::copy(smarkov); + copy(smarkov); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] dist_type distribution type, + * \param[in] variable variable index, + * \param[in] value state or observation. + * + * \return DiscreteParametricModel object. + */ +/*--------------------------------------------------------------*/ + +DiscreteParametricModel* SemiMarkov::extract(StatError &error , process_distribution dist_type , + int variable , int value) const + +{ + bool status = true; + int hvariable; + Distribution *pdist; + DiscreteParametric *pparam; + DiscreteParametricModel *dist; + FrequencyDistribution *phisto; + CategoricalSequenceProcess *process; + + + dist = NULL; + error.init(); + + pdist = NULL; + pparam = NULL; + + if (dist_type == OBSERVATION) { + if ((variable < 1) || (variable > nb_output_process)) { + status = false; + error.update(STAT_error[STATR_OUTPUT_PROCESS_INDEX]); + } + + else { + if ((value < 0) || (value >= nb_state)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << value << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[variable - 1]) { + pdist = categorical_process[variable - 1]->observation[value]; + } + else if (discrete_parametric_process[variable - 1]) { + pparam = discrete_parametric_process[variable - 1]->observation[value]; + } + else { + status = false; + ostringstream correction_message; + correction_message << STAT_label[STATL_CATEGORICAL] << " or " + << STAT_label[STATL_DISCRETE_PARAMETRIC]; + error.correction_update(STAT_error[STATR_OUTPUT_PROCESS_TYPE] , (correction_message.str()).c_str()); + } + } + } + } + + else { + if ((variable < 0) || (variable > nb_output_process)) { + status = false; + error.update(STAT_error[STATR_OUTPUT_PROCESS_INDEX]); + } + + else { + if (variable == 0) { + process = state_process; + } + + else { + process = categorical_process[variable - 1]; + + if (!process) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable << ": " + << SEQ_error[SEQR_CHARACTERISTICS_NOT_COMPUTED]; + error.update((error_message.str()).c_str()); + } + } + + if ((process) && ((value < 0) || (value >= process->nb_value))) { + status = false; + ostringstream error_message; + error_message << STAT_label[variable == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << value << " " << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + if (status) { + switch (dist_type) { + case FIRST_OCCURRENCE : + pdist = process->first_occurrence[value]; + break; + case RECURRENCE_TIME : + pdist = process->recurrence_time[value]; + break; + case SOJOURN_TIME : + pparam = process->sojourn_time[value]; + break; + case NB_RUN : + pdist = process->nb_run[value]; + break; + case NB_OCCURRENCE : + pdist = process->nb_occurrence[value]; + break; + } + + if ((!pdist) && (!pparam)) { + status = false; + error.update(SEQ_error[SEQR_NON_EXISTING_CHARACTERISTIC_DISTRIBUTION]); + } + } + } + } + + if (status) { + phisto = NULL; + + if (semi_markov_data) { + switch (semi_markov_data->type[0]) { + case STATE : + hvariable = variable; + break; + case INT_VALUE : + hvariable = variable - 1; + break; + } + + if (hvariable >= 0) { + switch (dist_type) { + + case OBSERVATION : { + if ((semi_markov_data->observation_distribution) && + (semi_markov_data->observation_distribution[hvariable])) { + phisto = semi_markov_data->observation_distribution[hvariable][value]; + } + break; + } + + case FIRST_OCCURRENCE : { + phisto = semi_markov_data->characteristics[hvariable]->first_occurrence[value]; + break; + } + + case RECURRENCE_TIME : { + if (semi_markov_data->characteristics[hvariable]->recurrence_time[value]->nb_element > 0) { + phisto = semi_markov_data->characteristics[hvariable]->recurrence_time[value]; + } + break; + } + + case SOJOURN_TIME : { + if (semi_markov_data->characteristics[hvariable]->sojourn_time[value]->nb_element > 0) { + phisto = semi_markov_data->characteristics[hvariable]->sojourn_time[value]; + } + break; + } + + case NB_RUN : { + phisto = semi_markov_data->characteristics[hvariable]->nb_run[value]; + break; + } + + case NB_OCCURRENCE : { + phisto = semi_markov_data->characteristics[hvariable]->nb_occurrence[value]; + break; + } + } + } + } + + if (pdist) { + dist = new DiscreteParametricModel(*pdist , phisto); + } + else if (pparam) { + dist = new DiscreteParametricModel(*pparam , phisto); + } + } + + return dist; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a forward recurrence time distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] state state, + * \param[in] histo_type type of associated frequency distribution. + * + * \return DiscreteParametricModel object. + */ +/*--------------------------------------------------------------*/ + +DiscreteParametricModel* SemiMarkov::extract(StatError &error , int state , + process_distribution histo_type) const + +{ + bool status = true; + Distribution *pdist; + DiscreteParametricModel *dist; + FrequencyDistribution *phisto; + + + dist = NULL; + error.init(); + + pdist = NULL; + + if ((state < 0) || (state >= nb_state)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << state << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + pdist = forward[state]; + + if (!pdist) { + status = false; + error.update(SEQ_error[SEQR_NON_EXISTING_FORWARD_DISTRIBUTION]); + } + + else { + phisto = NULL; + + if ((semi_markov_data) && (semi_markov_data->type[0] == STATE)) { + switch (histo_type) { + + case INITIAL_RUN : { + if ((semi_markov_data->characteristics[0]->initial_run) && + (semi_markov_data->characteristics[0]->initial_run[state]->nb_element > 0)) { + phisto = semi_markov_data->characteristics[0]->initial_run[state]; + } + break; + } + + case FINAL_RUN : { + if (semi_markov_data->characteristics[0]->final_run[state]->nb_element > 0) { + phisto = semi_markov_data->characteristics[0]->final_run[state]; + } + break; + } + } + } + + dist = new DiscreteParametricModel(*pdist , phisto); + } + } + + return dist; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the SemiMarkovData object included in a SemiMarkov object. + * + * \param[in] error reference on a StatError object. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* SemiMarkov::extract_data(StatError &error) const + +{ + bool status = true; + SemiMarkovData *seq; + + + seq = NULL; + error.init(); + + if (!semi_markov_data) { + status = false; + error.update(STAT_error[STATR_NO_DATA]); + } + else if (nb_output_process + 1 != semi_markov_data->nb_variable) { + status = false; + error.update(SEQ_error[SEQR_STATE_SEQUENCES]); + } + + if (status) { + seq = new SemiMarkovData(*semi_markov_data); + seq->semi_markov = new SemiMarkov(*this , false); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Application of a threshold on the probability parameters of a semi-Markov chain. + * + * \param[in] min_probability minimum probability. + * + * \return SemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov* SemiMarkov::thresholding(double min_probability) const + +{ + int i; + SemiMarkov *smarkov; + + + smarkov = new SemiMarkov(*this , false , 0); + smarkov->Chain::thresholding(min_probability , true); + + for (i = 0;i < smarkov->nb_output_process;i++) { + if (smarkov->categorical_process[i]) { + smarkov->categorical_process[i]->thresholding(min_probability); + } + } + + return smarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a SemiMarkov object from a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] cumul_threshold threshold on the state occupancy cumulative distribution functions. + * + * \return SemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov* SemiMarkov::ascii_read(StatError &error , const string path , int length , + bool counting_flag , double cumul_threshold) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + process_type type = DEFAULT_TYPE; + bool status; + int i; + int line; + const Chain *chain; + const CategoricalSequenceProcess *occupancy; + const CategoricalProcess *observation; + SemiMarkov *smarkov; + ifstream in_file(path.c_str()); + + + smarkov = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + line = 0; + + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + + // test (EQUILIBRIUM_)SEMI-MARKOV_CHAIN keyword + + if (i == 0) { + if (*token == SEQ_word[SEQW_SEMI_MARKOV_CHAIN]) { + type = ORDINARY; + } + else if (*token == SEQ_word[SEQW_EQUILIBRIUM_SEMI_MARKOV_CHAIN]) { + type = EQUILIBRIUM; + } + else { + status = false; + ostringstream correction_message; + correction_message << SEQ_word[SEQW_SEMI_MARKOV_CHAIN] << " or " + << SEQ_word[SEQW_EQUILIBRIUM_SEMI_MARKOV_CHAIN]; + error.correction_update(STAT_parsing[STATP_KEYWORD] , + (correction_message.str()).c_str() , line); + } + } + + i++; + } + + if (i > 0) { + if (i != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + break; + } + } + + if (type != DEFAULT_TYPE) { + + // analysis of the format and reading of the Markov chain + + chain = Chain::parsing(error , in_file , line , type); + + if (chain) { + + // analysis of the format and reading of the state occupancy distributions + + occupancy = CategoricalSequenceProcess::occupancy_parsing(error , in_file , line , + *chain , cumul_threshold); + if (!occupancy) { + status = false; + } + + // analysis of the format and reading of the categorical observation distributions + + observation = NULL; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + + // test OUTPUT_PROCESS keyword + + if (i == 0) { + if (*token != STAT_word[STATW_OUTPUT_PROCESS]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_OUTPUT_PROCESS] , line); + } + } + + i++; + } + + if (i > 0) { + if (i != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + observation = CategoricalProcess::parsing(error , in_file , line , chain->nb_state , + HIDDEN_MARKOV , false); + if (!observation) { + status = false; + } + + break; + } + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + if (!(trim_right_copy_if(buffer , is_any_of(" \t")).empty())) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + + if (status) { + smarkov = new SemiMarkov(chain , occupancy , observation , length , counting_flag); + } + + delete chain; + delete occupancy; + delete observation; + } + } + } + + return smarkov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing on a single line of a SemiMarkov object. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& SemiMarkov::line_write(ostream &os) const + +{ + os << nb_state << " " << STAT_word[STATW_STATES]; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkov object and the associated data structure. + * + * \param[in,out] os stream, + * \param[in] seq pointer on a SemiMarkovData object, + * \param[in] exhaustive flag detail level, + * \param[in] file_flag flag file, + * \param[in] hidden flag hidden model. + */ +/*--------------------------------------------------------------*/ + +ostream& SemiMarkov::ascii_write(ostream &os , const SemiMarkovData *seq , + bool exhaustive , bool file_flag , bool hidden) const + +{ + int i , j , k; + int buff , width , variable; + double **distance; + FrequencyDistribution *marginal_dist = NULL , **observation_dist = NULL; + Histogram *marginal_histo = NULL , **observation_histo = NULL; + SequenceCharacteristics *characteristics = NULL; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::left , ios::adjustfield); + + if (hidden) { + switch (type) { + case ORDINARY : + os << SEQ_word[SEQW_HIDDEN_SEMI_MARKOV_CHAIN] << endl; + break; + case EQUILIBRIUM : + os << SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_SEMI_MARKOV_CHAIN] << endl; + break; + } + } + + else { + switch (type) { + case ORDINARY : + os << SEQ_word[SEQW_SEMI_MARKOV_CHAIN] << endl; + break; + case EQUILIBRIUM : + os << SEQ_word[SEQW_EQUILIBRIUM_SEMI_MARKOV_CHAIN] << endl; + break; + } + } + + // writing of the Markov chain parameters + + ascii_print(os , file_flag); + + // writing of the state occupancy distributions + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + } + else { + characteristics = NULL; + } + + state_process->ascii_print(os , 0 , NULL , NULL , characteristics , + exhaustive , file_flag , forward); + + if (hidden) { + for (i = 0;i < nb_output_process;i++) { + if (discrete_parametric_process[i]) { + if (discrete_parametric_process[i]->weight) { + width = column_width(nb_state , discrete_parametric_process[i]->weight->mass); + } + else { + width = 0; + } + if (discrete_parametric_process[i]->restoration_weight) { + buff = column_width(nb_state , discrete_parametric_process[i]->restoration_weight->mass); + if (buff > width) { + width = buff; + } + } + width++; + + if (discrete_parametric_process[i]->weight) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_THEORETICAL] << " " << SEQ_label[SEQL_STATE_PROBABILITY] << ": "; + + for (j = 0;j < nb_state;j++) { + os << setw(width) << discrete_parametric_process[i]->weight->mass[j]; + } + os << endl; + } + + if (discrete_parametric_process[i]->restoration_weight) { + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_RESTORATION] << " " << SEQ_label[SEQL_STATE_PROBABILITY] << ": "; + + for (j = 0;j < nb_state;j++) { + os << setw(width) << discrete_parametric_process[i]->restoration_weight->mass[j]; + } + os << endl; + } + + break; + } + + else if (continuous_parametric_process[i]) { + if (continuous_parametric_process[i]->weight) { + width = column_width(nb_state , continuous_parametric_process[i]->weight->mass); + } + else { + width = 0; + } + if (continuous_parametric_process[i]->restoration_weight) { + buff = column_width(nb_state , continuous_parametric_process[i]->restoration_weight->mass); + if (buff > width) { + width = buff; + } + } + width++; + + if (continuous_parametric_process[i]->weight) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_THEORETICAL] << " " << SEQ_label[SEQL_STATE_PROBABILITY] << ": "; + + for (j = 0;j < nb_state;j++) { + os << setw(width) << continuous_parametric_process[i]->weight->mass[j]; + } + os << endl; + } + + if (continuous_parametric_process[i]->restoration_weight) { + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_RESTORATION] << " " << SEQ_label[SEQL_STATE_PROBABILITY] << ": "; + + for (j = 0;j < nb_state;j++) { + os << setw(width) << continuous_parametric_process[i]->restoration_weight->mass[j]; + } + os << endl; + } + + break; + } + } + + os << "\n" << nb_output_process << " " + << STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES] << endl; + } + + // writing of the distributions associated with each observation process + + if (hidden) { + distance = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + distance[i] = new double[nb_state]; + } + } + + for (i = 0;i < nb_output_process;i++) { + os << "\n" << STAT_word[STATW_OUTPUT_PROCESS]; + + if (hidden) { + os << " " << i + 1; + + if (categorical_process[i]) { + os << " : " << STAT_word[STATW_CATEGORICAL]; + } + else if (discrete_parametric_process[i]) { + os << " : " << STAT_word[STATW_DISCRETE_PARAMETRIC]; + } + else { + os << " : " << STAT_word[STATW_CONTINUOUS_PARAMETRIC]; + } + } + os << endl; + + if ((continuous_parametric_process[i]) && ((continuous_parametric_process[i]->ident == LINEAR_MODEL) || + (continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL))) { + for (j = 0;j < nb_state;j++) { + os << "\n" << STAT_word[STATW_STATE] << " " << j << " " + << STAT_word[STATW_OBSERVATION_MODEL] << endl; + continuous_parametric_process[i]->observation[j]->ascii_parameter_print(os , file_flag); + } + } + + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + if ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL)) { + if (seq->type[0] == STATE) { + seq->autoregressive_model_ascii_print(os , variable , continuous_parametric_process[i] , file_flag); + } + } + + else if ((categorical_process[i]) || (discrete_parametric_process[i]) || + ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident != LINEAR_MODEL))) { + if (seq->observation_distribution) { + observation_dist = seq->observation_distribution[variable]; + } + marginal_dist = seq->marginal_distribution[variable]; + + if (seq->observation_histogram) { + observation_histo = seq->observation_histogram[variable]; + } + marginal_histo = seq->marginal_histogram[variable]; + + characteristics = seq->characteristics[variable]; + } + } + + if (categorical_process[i]) { + categorical_process[i]->ascii_print(os , i + 1 , observation_dist , marginal_dist , + characteristics , exhaustive , file_flag); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + distance[j][j] = 0.; + + for (k = j + 1;k < nb_state;k++) { + if ((transition[j][k] > MIN_PROBABILITY) || (transition[k][j] > MIN_PROBABILITY)) { + distance[j][k] = categorical_process[i]->observation[j]->overlap_distance_computation(*(categorical_process[i]->observation[k])); + } + else { + distance[j][k] = 1.; + } + + distance[k][j] = distance[j][k]; + } + } + } + } + + else if (discrete_parametric_process[i]) { + discrete_parametric_process[i]->ascii_print(os , observation_dist , marginal_dist , + exhaustive , file_flag); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + distance[j][j] = 0.; + + for (k = j + 1;k < nb_state;k++) { + if ((transition[j][k] > MIN_PROBABILITY) || (transition[k][j] > MIN_PROBABILITY)) { + distance[j][k] = discrete_parametric_process[i]->observation[j]->sup_norm_distance_computation(*(discrete_parametric_process[i]->observation[k])); + } + else { + distance[j][k] = 1.; + } + + distance[k][j] = distance[j][k]; + } + } + } + } + + else if ((continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)) { + continuous_parametric_process[i]->ascii_print(os , observation_histo , observation_dist , + marginal_histo , marginal_dist , + exhaustive , file_flag); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + distance[j][j] = 0.; + + for (k = j + 1;k < nb_state;k++) { + if ((transition[j][k] > MIN_PROBABILITY) || (transition[k][j] > MIN_PROBABILITY)) { + distance[j][k] = continuous_parametric_process[i]->observation[j]->sup_norm_distance_computation(*(continuous_parametric_process[i]->observation[k])); + } + else { + distance[j][k] = 1.; + } + + distance[k][j] = distance[j][k]; + } + } + } + } + + if ((hidden) && ((categorical_process[i]) || (discrete_parametric_process[i]) || + ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)))) { + width = column_width(nb_state , distance[0]); + for (j = 1;j < nb_state;j++) { + buff = column_width(nb_state , distance[j]); + if (buff > width) { + width = buff; + } + } + width += ASCII_SPACE; + + os.setf(ios::left , ios::adjustfield); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_CONSECUTIVE_STATE_OBSERVATION_DISTRIBUTION_DISTANCE] << endl; + + for (j = 0;j < nb_state;j++) { + if (file_flag) { + os << "# "; + } + for (k = 0;k < nb_state;k++) { + if ((k != j) && (transition[j][k] > MIN_PROBABILITY)) { + os << setw(width) << distance[j][k]; + } + else { + os << setw(width) << "_"; + } + } + os << endl; + } + } + } + + if (hidden) { + for (i = 0;i < nb_state;i++) { + delete [] distance[i]; + } + delete [] distance; + } + + if (seq) { + int nb_parameter = nb_parameter_computation(hidden ? MIN_PROBABILITY : 0.); + double information; + + + // writing of the sequence length frequency distribution + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + seq->length_distribution->ascii_characteristic_print(os , false , file_flag); + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + seq->length_distribution->ascii_print(os , file_flag); + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_CUMUL_LENGTH] << ": " << seq->cumul_length << endl; + + // writing of the information quantity of the observed sequences in the i.i.d. case + + for (i = 0;i < seq->nb_variable;i++) { + if (seq->type[i] == REAL_VALUE) { + break; + } + } + + if (i == seq->nb_variable) { + information = seq->iid_information_computation(); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_IID_INFORMATION] << ": " << information << " (" + << information / seq->cumul_length << ")" << endl; + } + + // writing of the (penalized) log-likelihoods of the model for sequences + + if (hidden) { + if (seq->restoration_likelihood != D_INF) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD] << ": " << seq->restoration_likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << seq->restoration_likelihood / seq->cumul_length << ")" << endl; + } + + if (seq->sample_entropy != D_DEFAULT) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << seq->sample_entropy << " (" + << STAT_label[STATL_NORMALIZED] << ": " << seq->sample_entropy / seq->cumul_length << ")" << endl; + } + + if (seq->likelihood != D_INF) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << ": " << seq->likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << seq->likelihood / seq->cumul_length << ")" << endl; + } + } + + else { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_LIKELIHOOD] << ": " << seq->likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << seq->likelihood / seq->cumul_length << ")" << endl; + } + + if (seq->likelihood != D_INF) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AIC] << "): " + << 2 * (seq->likelihood - nb_parameter) << endl; + + if (nb_parameter < seq->cumul_length - 1) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AICc] << "): " + << 2 * (seq->likelihood - (double)(nb_parameter * seq->cumul_length) / + (double)(seq->cumul_length - nb_parameter - 1)) << endl; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BIC] << "): " + << 2 * seq->likelihood - nb_parameter * log((double)seq->cumul_length) << endl; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BICc] << "): " + << 2 * seq->likelihood - penalty_computation(hidden , (hidden ? MIN_PROBABILITY : 0.)) << endl; + } + +// if ((hidden) && (seq->restoration_likelihood != D_INF)) { + if ((hidden) && (seq->likelihood != D_INF)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICL] << "): " +// << 2 * seq->restoration_likelihood - nb_parameter * log((double)seq->cumul_length) << endl; + << 2 * (seq->likelihood - seq->sample_entropy) - nb_parameter * log((double)seq->cumul_length) << endl; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICLc] << "): " +// << 2 * seq->restoration_likelihood - penalty_computation(hidden , MIN_PROBABILITY) << endl; + << 2 * (seq->likelihood - seq->sample_entropy) - penalty_computation(hidden , MIN_PROBABILITY) << endl; + } + } + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkov object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& SemiMarkov::ascii_write(ostream &os , bool exhaustive) const + +{ + return ascii_write(os , semi_markov_data , exhaustive , false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkov object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkov::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + ascii_write(out_file , semi_markov_data , exhaustive , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkov object and the associated data structure + * in a file at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] seq pointer on a SemiMarkovData object, + * \param[in] hidden flag hidden model. + */ +/*--------------------------------------------------------------*/ + +ostream& SemiMarkov::spreadsheet_write(ostream &os , const SemiMarkovData *seq , + bool hidden) const + +{ + int i , j , k; + int variable; + double **distance; + FrequencyDistribution *marginal_dist = NULL , **observation_dist = NULL; + Histogram *marginal_histo = NULL , **observation_histo = NULL; + SequenceCharacteristics *characteristics = NULL; + + + if (hidden) { + switch (type) { + case ORDINARY : + os << SEQ_word[SEQW_HIDDEN_SEMI_MARKOV_CHAIN] << endl; + break; + case EQUILIBRIUM : + os << SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_SEMI_MARKOV_CHAIN] << endl; + break; + } + } + + else { + switch (type) { + case ORDINARY : + os << SEQ_word[SEQW_SEMI_MARKOV_CHAIN] << endl; + break; + case EQUILIBRIUM : + os << SEQ_word[SEQW_EQUILIBRIUM_SEMI_MARKOV_CHAIN] << endl; + break; + } + } + + // writing of the Markov chain parameters + + spreadsheet_print(os); + + // writing of the state occupancy distributions + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + } + else { + characteristics = NULL; + } + + state_process->spreadsheet_print(os , 0 , NULL , NULL , characteristics , forward); + + // writing of the distributions associated with each observation process + + if (hidden) { + os << "\n" << nb_output_process << "\t" + << STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES] << endl; + } + + if (hidden) { + distance = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + distance[i] = new double[nb_state]; + } + } + + for (i = 0;i < nb_output_process;i++) { + os << "\n" << STAT_word[STATW_OUTPUT_PROCESS]; + + if (hidden) { + os << "\t" << i + 1; + + if (categorical_process[i]) { + os << "\t" << STAT_word[STATW_CATEGORICAL]; + } + else if (discrete_parametric_process[i]) { + os << "\t" << STAT_word[STATW_DISCRETE_PARAMETRIC]; + } + else { + os << "\t" << STAT_word[STATW_CONTINUOUS_PARAMETRIC]; + } + } + os << endl; + + if ((continuous_parametric_process[i]) && ((continuous_parametric_process[i]->ident == LINEAR_MODEL) || + (continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL))) { + for (j = 0;j < nb_state;j++) { + os << "\n" << STAT_word[STATW_STATE] << " " << j << "\t" + << STAT_word[STATW_OBSERVATION_MODEL] << endl; + continuous_parametric_process[i]->observation[j]->spreadsheet_parameter_print(os); + } + } + + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + if ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident == LINEAR_MODEL)) { + seq->linear_model_spreadsheet_print(os , variable , continuous_parametric_process[i]); + } + else if ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL)) { + if (seq->type[0] == STATE) { + seq->autoregressive_model_spreadsheet_print(os , variable , continuous_parametric_process[i]); + } + } + + else { + if (seq->observation_distribution) { + observation_dist = seq->observation_distribution[variable]; + } + marginal_dist = seq->marginal_distribution[variable]; + + if (seq->observation_histogram) { + observation_histo = seq->observation_histogram[variable]; + } + marginal_histo = seq->marginal_histogram[variable]; + + characteristics = seq->characteristics[variable]; + } + } + + if (categorical_process[i]) { + categorical_process[i]->spreadsheet_print(os , i + 1 , observation_dist , marginal_dist , + characteristics); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + for (k = j + 1;k < nb_state;k++) { + if ((transition[j][k] > MIN_PROBABILITY) || (transition[k][j] > MIN_PROBABILITY)) { + distance[j][k] = categorical_process[i]->observation[j]->overlap_distance_computation(*(categorical_process[i]->observation[k])); + distance[k][j] = distance[j][k]; + } + } + } + } + } + + else if (discrete_parametric_process[i]) { + discrete_parametric_process[i]->spreadsheet_print(os , observation_dist , marginal_dist); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + for (k = j + 1;k < nb_state;k++) { + if ((transition[j][k] > MIN_PROBABILITY) || (transition[k][j] > MIN_PROBABILITY)) { + distance[j][k] = discrete_parametric_process[i]->observation[j]->sup_norm_distance_computation(*(discrete_parametric_process[i]->observation[k])); + distance[k][j] = distance[j][k]; + } + } + } + } + } + + else if ((continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)) { + continuous_parametric_process[i]->spreadsheet_print(os , observation_histo , observation_dist , + marginal_histo , marginal_dist); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + for (k = j + 1;k < nb_state;k++) { + if ((transition[j][k] > MIN_PROBABILITY) || (transition[k][j] > MIN_PROBABILITY)) { + distance[j][k] = continuous_parametric_process[i]->observation[j]->sup_norm_distance_computation(*(continuous_parametric_process[i]->observation[k])); + distance[k][j] = distance[j][k]; + } + } + } + } + } + + if ((hidden) && ((categorical_process[i]) || (discrete_parametric_process[i]) || + ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)))) { + os << "\n" << STAT_label[STATL_CONSECUTIVE_STATE_OBSERVATION_DISTRIBUTION_DISTANCE] << endl; + + for (j = 0;j < nb_state;j++) { + for (k = 0;k < nb_state;k++) { + if ((k != j) && (transition[j][k] > MIN_PROBABILITY)) { + os << distance[j][k]; + } + os << "\t"; + } + os << endl; + } + } + } + + if (hidden) { + for (i = 0;i < nb_state;i++) { + delete [] distance[i]; + } + delete [] distance; + } + + if (seq) { + int nb_parameter = nb_parameter_computation(hidden ? MIN_PROBABILITY : 0.); + double information; + + + // writing of the sequence length frequency distribution + + os << "\n" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + seq->length_distribution->spreadsheet_characteristic_print(os); + + os << "\n\t" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + seq->length_distribution->spreadsheet_print(os); + + os << "\n" << SEQ_label[SEQL_CUMUL_LENGTH] << "\t" << seq->cumul_length << endl; + + // writing of the information quantity of the observed sequences in the i.i.d. case + + for (i = 0;i < seq->nb_variable;i++) { + if (seq->type[i] == REAL_VALUE) { + break; + } + } + + if (i == seq->nb_variable) { + information = seq->iid_information_computation(); + + os << "\n" << SEQ_label[SEQL_IID_INFORMATION] << "\t" << information << "\t" + << information / seq->cumul_length << endl; + } + + // writing of the (penalized) log-likelihoods of the model for sequences + + if (hidden) { + if (seq->restoration_likelihood != D_INF) { + os << "\n" << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD] << "\t" << seq->restoration_likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << seq->restoration_likelihood / seq->cumul_length << endl; + } + + if (seq->sample_entropy != D_DEFAULT) { + os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << "\t" << seq->sample_entropy << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << seq->sample_entropy / seq->cumul_length << endl; + } + + if (seq->likelihood != D_INF) { + os << "\n" << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << "\t" << seq->likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << seq->likelihood / seq->cumul_length << endl; + } + } + + else { + os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << seq->likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << seq->likelihood / seq->cumul_length << endl; + } + + if (seq->likelihood != D_INF) { + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AIC] << ")\t" + << 2 * (seq->likelihood - nb_parameter) << endl; + + if (nb_parameter < seq->cumul_length - 1) { + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AICc] << ")\t" + << 2 * (seq->likelihood - (double)(nb_parameter * seq->cumul_length) / + (double)(seq->cumul_length - nb_parameter - 1)) << endl; + } + + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BIC] << ")\t" + << 2 * seq->likelihood - nb_parameter * log((double)seq->cumul_length) << endl; + + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BICc] << ")\t" + << 2 * seq->likelihood - penalty_computation(hidden , (hidden ? MIN_PROBABILITY : 0.)) << endl; + } + +// if ((hidden) && (seq->restoration_likelihood != D_INF)) { + if ((hidden) && (seq->likelihood != D_INF)) { + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICL] << ")\t" +// << 2 * seq->restoration_likelihood - nb_parameter * log((double)seq->cumul_length) << endl; + << 2 * (seq->likelihood - seq->sample_entropy) - nb_parameter * log((double)seq->cumul_length) << endl; + + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICLc] << ")\t" +// << 2 * seq->restoration_likelihood - penalty_computation(hidden , MIN_PROBABILITY) << endl; + << 2 * (seq->likelihood - seq->sample_entropy) - penalty_computation(hidden , MIN_PROBABILITY) << endl; + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkov object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkov::spreadsheet_write(StatError &error , const string path) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + spreadsheet_write(out_file , semi_markov_data); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a SemiMarkov object and the associated data structure using Gnuplot. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] seq pointer on a SemiMarkovData object. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkov::plot_write(const char *prefix , const char *title , + const SemiMarkovData *seq) const + +{ + bool status; + int i; + int variable , nb_value = I_DEFAULT; + double *empirical_cdf[2]; + FrequencyDistribution *length_distribution = NULL , *marginal_dist = NULL , **observation_dist = NULL; + Histogram *marginal_histo = NULL , **observation_histo = NULL; + SequenceCharacteristics *characteristics = NULL; + + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + length_distribution = seq->length_distribution; + } + else { + characteristics = NULL; + } + + status = state_process->plot_print(prefix , title , 0 , NULL , NULL , + characteristics , length_distribution , forward); + + if (status) { + if (seq) { + length_distribution = seq->length_distribution; + } + + for (i = 0;i < nb_output_process;i++) { + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + if ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident == LINEAR_MODEL)) { + seq->linear_model_plot_print(prefix , title , variable , continuous_parametric_process[i]); + } + else if ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL)) { + if (seq->type[0] == STATE) { + seq->autoregressive_model_plot_print(prefix , title , variable , continuous_parametric_process[i]); + } + } + + else { + if (seq->observation_distribution) { + observation_dist = seq->observation_distribution[variable]; + } + marginal_dist = seq->marginal_distribution[variable]; + + if (seq->observation_histogram) { + observation_histo = seq->observation_histogram[variable]; + } + marginal_histo = seq->marginal_histogram[variable]; + + characteristics = seq->characteristics[variable]; + + if (continuous_parametric_process[i]) { + nb_value = seq->cumulative_distribution_function_computation(variable , empirical_cdf); + } + } + } + + if (categorical_process[i]) { + categorical_process[i]->plot_print(prefix , title , i + 1 , observation_dist , + marginal_dist , characteristics , + length_distribution); + } + else if (discrete_parametric_process[i]) { + discrete_parametric_process[i]->plot_print(prefix , title , i + 1 , observation_dist , + marginal_dist); + } + else if ((continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)) { + continuous_parametric_process[i]->plot_print(prefix , title , i + 1 , + observation_histo , observation_dist , + marginal_histo , marginal_dist , + nb_value , (seq ? empirical_cdf : NULL)); + if (seq) { + delete [] empirical_cdf[0]; + delete [] empirical_cdf[1]; + } + } + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a SemiMarkov object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkov::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status = plot_write(prefix , title , semi_markov_data); + + error.init(); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a SemiMarkov object and the associated data structure. + * + * \param[in] seq pointer on a SemiMarkovData object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* SemiMarkov::get_plotable(const SemiMarkovData *seq) const + +{ + int i , j; + int nb_plot_set , index_length , index , variable; + FrequencyDistribution *length_distribution = NULL , *marginal_dist = NULL , **observation_dist = NULL; + Histogram *marginal_histo = NULL , **observation_histo = NULL; + SequenceCharacteristics *characteristics = NULL; + MultiPlotSet *plot_set; + + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + } + else { + characteristics = NULL; + } + + // computation of the number of plots + + nb_plot_set = 0; + + if ((state_process->index_value) || (characteristics)) { + nb_plot_set++; + + if (characteristics) { + index_length = characteristics->index_value->plot_length_computation(); + + if (characteristics->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + nb_plot_set++; + } + nb_plot_set++; + } + } + + if ((state_process->first_occurrence) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((state_process->first_occurrence) && + (state_process->first_occurrence[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((state_process->recurrence_time) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((state_process->recurrence_time) && + (state_process->recurrence_time[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((state_process->sojourn_time) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((state_process->sojourn_time) && + (state_process->sojourn_time[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + nb_plot_set++; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->initial_run) && + (characteristics->initial_run[i]->nb_element > 0)) { + nb_plot_set++; + } + + if ((forward) && (forward[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((state_process->nb_run) || (state_process->nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + for (i = 0;i < nb_state;i++) { + if (state_process->nb_run) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_run) && (characteristics->nb_run[i]->nb_element > 0)) { + nb_plot_set++; + } + + if (state_process->nb_occurrence) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + nb_plot_set++; + } + } + + if ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence)) { + nb_plot_set++; + } + } + + for (i = 0;i < nb_output_process;i++) { + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + characteristics = seq->characteristics[variable]; + } + + if (categorical_process[i]) { + if ((categorical_process[i]->index_value) || (characteristics)) { + nb_plot_set++; + + if (characteristics) { + index_length = characteristics->index_value->plot_length_computation(); + + if (characteristics->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + nb_plot_set++; + } + nb_plot_set++; + } + } + + if ((categorical_process[i]->first_occurrence) || (characteristics)) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if ((categorical_process[i]->first_occurrence) && + (categorical_process[i]->first_occurrence[j])) { + nb_plot_set++; + } + else if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->first_occurrence[j]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((categorical_process[i]->recurrence_time) || (characteristics)) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if ((categorical_process[i]->recurrence_time) && + (categorical_process[i]->recurrence_time[j])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[j]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((categorical_process[i]->sojourn_time) || (characteristics)) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if ((categorical_process[i]->sojourn_time) && + (categorical_process[i]->sojourn_time[j])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[j]->nb_element > 0)) { + nb_plot_set++; + } + +/* if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->initial_run) && + (characteristics->initial_run[j]->nb_element > 0)) { + nb_plot_set++; + } */ + + if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->final_run[j]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((categorical_process[i]->nb_run) || (categorical_process[i]->nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if (categorical_process[i]->nb_run) { + nb_plot_set++; + } + else if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->nb_run) && (characteristics->nb_run[j]->nb_element > 0)) { + nb_plot_set++; + } + + if (categorical_process[i]->nb_occurrence) { + nb_plot_set++; + } + else if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[j]->nb_element > 0)) { + nb_plot_set++; + } + } + + if ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence)) { + nb_plot_set++; + } + } + } + + if ((seq != NULL) && ((seq->observation_distribution) || (seq->observation_histogram))) { + nb_plot_set += nb_state; + } + else { + nb_plot_set++; + } + + if ((seq != NULL) && (categorical_process[i]) && (seq->marginal_distribution[variable])) { + if ((categorical_process[i]->weight) && + (categorical_process[i]->mixture)) { + nb_plot_set++; + } + if ((categorical_process[i]->restoration_weight) && + (categorical_process[i]->restoration_mixture)) { + nb_plot_set++; + } + } + + if ((seq != NULL) && (discrete_parametric_process[i]) && (seq->marginal_distribution[variable])) { + if ((discrete_parametric_process[i]->weight) && + (discrete_parametric_process[i]->mixture)) { + nb_plot_set += 2; + } + if ((discrete_parametric_process[i]->restoration_weight) && + (discrete_parametric_process[i]->restoration_mixture)) { + nb_plot_set += 2; + } + } + + if ((seq != NULL) && (continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL) && + ((seq->marginal_histogram[variable]) || (seq->marginal_distribution[variable]))) { + if (continuous_parametric_process[i]->weight) { + nb_plot_set += 2; + } + if (continuous_parametric_process[i]->restoration_weight) { + nb_plot_set += 2; + } + } + + if ((continuous_parametric_process[i]) && ((continuous_parametric_process[i]->ident == LINEAR_MODEL) || + ((continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL) && (seq->type[0] == STATE)))) { + nb_plot_set += nb_state; + } + } + + plot_set = new MultiPlotSet(nb_plot_set , nb_output_process + 1); + plot_set->border = "15 lw 0"; + + if ((seq != NULL) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + length_distribution = seq->length_distribution; + } + else { + characteristics = NULL; + } + + index = 0; + plot_set->variable_nb_viewpoint[0] = 0; + state_process->plotable_write(*plot_set , index , 0 , NULL , NULL , characteristics , + length_distribution , forward); + + if (seq) { + length_distribution = seq->length_distribution; + } + + for (i = 0;i < nb_output_process;i++) { + if (seq != NULL) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + if ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident == LINEAR_MODEL)) { + seq->linear_model_plotable_write(*plot_set , index , variable , continuous_parametric_process[i]); + } + else if ((continuous_parametric_process[i]) && (continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL)) { + if (seq->type[0] == STATE) { + seq->autoregressive_model_plotable_write(*plot_set , index , variable , continuous_parametric_process[i]); + } + } + + else { + if (seq->observation_distribution) { + observation_dist = seq->observation_distribution[variable]; + } + marginal_dist = seq->marginal_distribution[variable]; + + if (seq->observation_histogram) { + observation_histo = seq->observation_histogram[variable]; + } + marginal_histo = seq->marginal_histogram[variable]; + + characteristics = seq->characteristics[variable]; + } + } + + if (categorical_process[i]) { + plot_set->variable_nb_viewpoint[i] = 0; + categorical_process[i]->plotable_write(*plot_set , index , i + 1 , observation_dist , + marginal_dist , characteristics , + length_distribution); + } + else if (discrete_parametric_process[i]) { + discrete_parametric_process[i]->plotable_write(*plot_set , index , i + 1 , observation_dist , + marginal_dist); + } + else if ((continuous_parametric_process[i]->ident != LINEAR_MODEL) && + (continuous_parametric_process[i]->ident != AUTOREGRESSIVE_MODEL)) { + continuous_parametric_process[i]->plotable_write(*plot_set , index , i + 1 , + observation_histo , observation_dist , + marginal_histo , marginal_dist); + } + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a SemiMarkov object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* SemiMarkov::get_plotable() const + +{ + return get_plotable(semi_markov_data); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of parameters of a SemiMarkov object. + * + * \param[in] min_probability minimum probability. + * + * \return number of parameters. + */ +/*--------------------------------------------------------------*/ + +int SemiMarkov::nb_parameter_computation(double min_probability) const + +{ + int i; + int nb_parameter = SemiMarkovChain::nb_parameter_computation(min_probability); + + + for (i = 0;i < nb_output_process;i++) { + if (categorical_process[i]) { + nb_parameter += categorical_process[i]->nb_parameter_computation(min_probability); + } + else if (discrete_parametric_process[i]) { + nb_parameter += discrete_parametric_process[i]->nb_parameter_computation(); + } + else { + nb_parameter += continuous_parametric_process[i]->nb_parameter_computation(); + } + } + + return nb_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of an adaptative penalty. + * + * \param[in] hidden flag hidden model, + * \param[in] min_probability minimum probability. + * + * \return adaptative penalty. + */ +/*--------------------------------------------------------------*/ + +double SemiMarkov::penalty_computation(bool hidden , double min_probability) const + +{ + int i , j , k; + int nb_parameter , sample_size; + double sum , *memory , *state_marginal; + double penalty = 0.; + + + if (semi_markov_data) { + if (hidden) { + memory = memory_computation(); + + state_marginal = new double[nb_state]; + + switch (type) { + + case ORDINARY : { + sum = 0.; + for (i = 0;i < state_process->length->nb_value - 2;i++) { + sum += (1. - state_process->length->cumul[i + 1]); + } + for (i = 0;i < nb_state;i++) { + memory[i] /= sum; + } + + for (i = 0;i < nb_state;i++) { + state_marginal[i] = 0.; + } + for (i = 0;i < state_process->length->nb_value - 1;i++) { + for (j = 0;j < nb_state;j++) { + state_marginal[j] += state_process->index_value->point[j][i] * + (1. - state_process->length->cumul[i]); + } + } + + sum = 0.; + for (i = 0;i < nb_state;i++) { + sum += state_marginal[i]; + } + for (i = 0;i < nb_state;i++) { + state_marginal[i] /= sum; + } + break; + } + + case EQUILIBRIUM : { + for (i = 0;i < nb_state;i++) { + state_marginal[i] = initial[i]; + } + break; + } + } + } + + for (i = 0;i < nb_state;i++) { + nb_parameter = 0; + if (!hidden) { + sample_size = 0; + } + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > min_probability) { + nb_parameter++; + if (!hidden) { + sample_size += semi_markov_data->chain_data->transition[i][j]; + } + } + } + + nb_parameter--; + + if (nb_parameter > 0) { + if (hidden) { + if (memory[i] > 0.) { + penalty += nb_parameter * log(memory[i] * semi_markov_data->cumul_length); + } + } + + else { + if (sample_size > 0) { + penalty += nb_parameter * log((double)sample_size); + } + } + } + } + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + nb_parameter = state_process->sojourn_time[i]->nb_parameter_computation(); + if (state_process->sojourn_time[i]->inf_bound == 1) { + nb_parameter--; + } + + if (hidden) { + penalty += nb_parameter * log(state_marginal[i] * semi_markov_data->cumul_length); + } + else { + penalty += nb_parameter * + log((double)semi_markov_data->marginal_distribution[0]->frequency[i]); + } + } + } + + for (i = 0;i < nb_output_process;i++) { + if (categorical_process[i]) { + for (j = 0;j < nb_state;j++) { + nb_parameter = 0; + for (k = 0;k < categorical_process[i]->nb_value;k++) { + if (categorical_process[i]->observation[j]->mass[k] > min_probability) { + nb_parameter++; + } + } + + nb_parameter--; + + if (nb_parameter > 0) { + if (hidden) { + penalty += nb_parameter * log(state_marginal[j] * semi_markov_data->cumul_length); + } + else { + penalty += nb_parameter * + log((double)semi_markov_data->marginal_distribution[0]->frequency[j]); + } + } + } + } + + else if (discrete_parametric_process[i]) { + for (j = 0;j < nb_state;j++) { + nb_parameter = discrete_parametric_process[i]->observation[j]->nb_parameter_computation(); + + if (hidden) { + penalty += nb_parameter * log(state_marginal[j] * semi_markov_data->cumul_length); + } + else { + penalty += nb_parameter * + log((double)semi_markov_data->marginal_distribution[0]->frequency[j]); + } + } + } + + else { + for (j = 0;j < nb_state;j++) { + nb_parameter = continuous_parametric_process[i]->observation[j]->nb_parameter_computation(); + + if (hidden) { + penalty += nb_parameter * log(state_marginal[j] * semi_markov_data->cumul_length); + } + else { + penalty += nb_parameter * + log((double)semi_markov_data->marginal_distribution[0]->frequency[j]); + } + } + } + } + + if (hidden) { + delete [] memory; + delete [] state_marginal; + } + } + + return penalty; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the SemiMarkovData class. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData::SemiMarkovData() + +{ + semi_markov = NULL; + chain_data = NULL; + + likelihood = D_INF; + restoration_likelihood = D_INF; + sample_entropy = D_DEFAULT; + + posterior_probability = NULL; + posterior_state_probability = NULL; + entropy = NULL; + nb_state_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkovData class. + * + * \param[in] ilength_distribution sequence length frequency distribution, + * \param[in] inb_variable number of variables, + * \param[in] itype variable types, + * \param[in] init_flag flag initialization. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData::SemiMarkovData(const FrequencyDistribution &ilength_distribution , int inb_variable , + variable_nature *itype , bool init_flag) +:MarkovianSequences(ilength_distribution , inb_variable , itype , init_flag) + +{ + semi_markov = NULL; + chain_data = NULL; + + likelihood = D_INF; + restoration_likelihood = D_INF; + sample_entropy = D_DEFAULT; + + posterior_probability = NULL; + posterior_state_probability = NULL; + entropy = NULL; + nb_state_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a SemiMarkovData object from a MarkovianSequences object + * adding a state variable. + * + * \param[in] seq reference on a MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData::SemiMarkovData(const MarkovianSequences &seq) +:MarkovianSequences(seq , ADD_STATE_VARIABLE , UNCHANGED) + +{ + semi_markov = NULL; + chain_data = NULL; + + likelihood = D_INF; + restoration_likelihood = D_INF; + sample_entropy = D_DEFAULT; + + posterior_probability = NULL; + posterior_state_probability = NULL; + entropy = NULL; + nb_state_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a SemiMarkovData object from a MarkovianSequences object. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] transform type of transform (SEQUENCE_COPY/ADD_STATE_VARIABLE), + * \param[in] initial_run_flag addition/removing of the initial run length frequency distributions. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData::SemiMarkovData(const MarkovianSequences &seq , sequence_transformation transform , + bool initial_run_flag) +:MarkovianSequences(seq , transform , (initial_run_flag ? ADD_INITIAL_RUN : REMOVE_INITIAL_RUN)) + +{ + semi_markov = NULL; + chain_data = NULL; + + likelihood = D_INF; + restoration_likelihood = D_INF; + sample_entropy = D_DEFAULT; + + posterior_probability = NULL; + posterior_state_probability = NULL; + entropy = NULL; + nb_state_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a SemiMarkovData object. + * + * \param[in] seq reference on a SemiMarkovData object, + * \param[in] model_flag flag copy of the included SemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovData::copy(const SemiMarkovData &seq , bool model_flag) + +{ + int i; + + + if ((model_flag) && (seq.semi_markov)) { + semi_markov = new SemiMarkov(*(seq.semi_markov) , false); + } + else { + semi_markov = NULL; + } + + if (seq.chain_data) { + chain_data = new ChainData(*(seq.chain_data)); + } + else { + chain_data = NULL; + } + + likelihood = seq.likelihood; + restoration_likelihood = seq.restoration_likelihood; + sample_entropy = seq.sample_entropy; + + if (seq.posterior_probability) { + posterior_probability = new double[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + posterior_probability[i] = seq.posterior_probability[i]; + } + } + else { + posterior_probability = NULL; + } + + if (seq.posterior_state_probability) { + posterior_state_probability = new double[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + posterior_state_probability[i] = seq.posterior_state_probability[i]; + } + } + else { + posterior_state_probability = NULL; + } + + if (seq.entropy) { + entropy = new double[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + entropy[i] = seq.entropy[i]; + } + } + else { + entropy = NULL; + } + + if (seq.nb_state_sequence) { + nb_state_sequence = new double[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + nb_state_sequence[i] = seq.nb_state_sequence[i]; + } + } + else { + nb_state_sequence = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the SemiMarkovData class. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData::~SemiMarkovData() + +{ + delete semi_markov; + delete chain_data; + + delete [] posterior_probability; + delete [] posterior_state_probability; + delete [] entropy; + delete [] nb_state_sequence; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the SemiMarkovData class. + * + * \param[in] seq reference on a SemiMarkovData object. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData& SemiMarkovData::operator=(const SemiMarkovData &seq) + +{ + if (&seq != this) { + delete semi_markov; + delete chain_data; + + delete [] posterior_probability; + delete [] posterior_state_probability; + delete [] entropy; + delete [] nb_state_sequence; + + remove(); + Sequences::remove(); + + Sequences::copy(seq); + MarkovianSequences::copy(seq); + copy(seq); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a frequency distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] histo_type frequency distribution type, + * \param[in] variable variable, + * \param[in] value state or observation. + * + * \return DiscreteDistributionData object. + */ +/*--------------------------------------------------------------*/ + +DiscreteDistributionData* SemiMarkovData::extract(StatError &error , process_distribution histo_type , + int variable , int value) const + +{ + bool status = true; + Distribution *pdist; + DiscreteParametric *pparam; + FrequencyDistribution *phisto; + DiscreteDistributionData *histo; + CategoricalSequenceProcess *process; + + + histo = NULL; + error.init(); + + phisto = NULL; + + if (histo_type == OBSERVATION) { + if ((variable < 2) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((value < 0) || (value >= marginal_distribution[0]->nb_value)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << value << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + if (!observation_distribution[variable]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + phisto = observation_distribution[variable][value]; + + if (phisto->nb_element == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + } + } + } + } + + else { + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (!characteristics[variable]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << SEQ_error[SEQR_CHARACTERISTICS_NOT_COMPUTED]; + error.update((error_message.str()).c_str()); + } + + else if ((value < 0) || (value >= marginal_distribution[variable]->nb_value)) { + status = false; + ostringstream error_message; + error_message << STAT_label[variable == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << value << " " << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + if (status) { + switch (histo_type) { + + case FIRST_OCCURRENCE : { + phisto = characteristics[variable]->first_occurrence[value]; + break; + } + + case RECURRENCE_TIME : { + phisto = characteristics[variable]->recurrence_time[value]; + break; + } + + case SOJOURN_TIME : { + phisto = characteristics[variable]->sojourn_time[value]; + break; + } + + case INITIAL_RUN : { + if (characteristics[variable]->initial_run) { + phisto = characteristics[variable]->initial_run[value]; + } + else { + status = false; + error.update(STAT_error[STATR_NON_EXISTING_FREQUENCY_DISTRIBUTION]); + } + break; + } + + case FINAL_RUN : { + phisto = characteristics[variable]->final_run[value]; + break; + } + + case NB_RUN : { + phisto = characteristics[variable]->nb_run[value]; + break; + } + + case NB_OCCURRENCE : { + phisto = characteristics[variable]->nb_occurrence[value]; + break; + } + } + + if ((phisto) && (phisto->nb_element == 0)) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + } + } + } + + if (status) { + if (variable == 0) { + process = semi_markov->state_process; + } + else { + process = semi_markov->categorical_process[variable - 1]; + } + + pdist = NULL; + pparam = NULL; + + switch (histo_type) { + + case OBSERVATION : { + if (semi_markov->categorical_process[variable - 1]) { + pdist = semi_markov->categorical_process[variable - 1]->observation[value]; + } + else if (semi_markov->discrete_parametric_process[variable - 1]) { + pparam = semi_markov->discrete_parametric_process[variable - 1]->observation[value]; + } + break; + } + + case FIRST_OCCURRENCE : { + pdist = process->first_occurrence[value]; + break; + } + + case RECURRENCE_TIME : { + pdist = process->recurrence_time[value]; + break; + } + + case SOJOURN_TIME : { + pparam = process->sojourn_time[value]; + break; + } + + case INITIAL_RUN : { + if ((variable == 0) && (semi_markov->forward)) { + pdist = semi_markov->forward[value]; + } + break; + } + + case FINAL_RUN : { + if ((variable == 0) && (semi_markov->forward)) { + pdist = semi_markov->forward[value]; + } + break; + } + + case NB_RUN : { + pdist = process->nb_run[value]; + break; + } + + case NB_OCCURRENCE : { + pdist = process->nb_occurrence[value]; + break; + } + } + + if (pdist) { + histo = new DiscreteDistributionData(*phisto , pdist); + } + else { + histo = new DiscreteDistributionData(*phisto , pparam); + } + } + + return histo; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a SemiMarkovData object transforming the implicit index parameters in + * explicit index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* SemiMarkovData::explicit_index_parameter(StatError &error) const + +{ + SemiMarkovData *seq; + + + error.init(); + + if (index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new SemiMarkovData(*this , true , EXPLICIT_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Removing of the index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* SemiMarkovData::remove_index_parameter(StatError &error) const + +{ + SemiMarkovData *seq; + + + error.init(); + + if (!index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new SemiMarkovData(*this , true , REMOVE_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the auxiliary variables corresponding to + * the restored state sequences. + * + * \param[in] error reference on a StatError object. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* SemiMarkovData::build_auxiliary_variable(StatError &error) const + +{ + bool status = true; + int i; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + + if (type[0] != STATE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " 1: " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[STATE]); + } + + for (i = 0;i < semi_markov->nb_output_process;i++) { + if (((semi_markov->discrete_parametric_process) && (semi_markov->discrete_parametric_process[i])) || + ((semi_markov->continuous_parametric_process) && (semi_markov->continuous_parametric_process[i]))) { + break; + } + } + + if (i == semi_markov->nb_output_process) { + status = false; + error.update(SEQ_error[SEQR_PARAMETRIC_PROCESS]); + } + + if (status) { + seq = MarkovianSequences::build_auxiliary_variable(semi_markov->discrete_parametric_process , + semi_markov->continuous_parametric_process); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Building of residual sequences on the basis of restored state sequences. + * + * \param[in] error reference on a StatError object. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* SemiMarkovData::residual_sequences(StatError &error) const + +{ + MarkovianSequences *seq; + + + error.init(); + + if (type[0] != STATE) { + seq = NULL; + + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " 1: " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[STATE]); + } + + else { + seq = MarkovianSequences::residual_sequences(semi_markov->categorical_process , + semi_markov->discrete_parametric_process , + semi_markov->continuous_parametric_process); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkovData object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& SemiMarkovData::ascii_write(ostream &os , bool exhaustive) const + +{ + if (semi_markov) { + semi_markov->ascii_write(os , this , exhaustive , false , + CategoricalSequenceProcess::test_hidden(semi_markov->nb_output_process , semi_markov->categorical_process)); + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkovData object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkovData::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status = false; + + + if (semi_markov) { + ofstream out_file(path.c_str()); + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + semi_markov->ascii_write(out_file , this , exhaustive , true , + CategoricalSequenceProcess::test_hidden(semi_markov->nb_output_process , semi_markov->categorical_process)); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkovData object. + * + * \param[in,out] os stream, + * \param[in] format format (line/column), + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +ostream& SemiMarkovData::ascii_data_write(ostream &os , output_sequence_format format , + bool exhaustive) const + +{ + MarkovianSequences::ascii_write(os , exhaustive , false); + ascii_print(os , format , false , posterior_probability , entropy , nb_state_sequence , posterior_state_probability); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkovData object. + * + * \param[in] format format (line/column), + * \param[in] exhaustive flag detail level, + * + * \return string. + */ +/*--------------------------------------------------------------*/ + +string SemiMarkovData::ascii_data_write(output_sequence_format format , bool exhaustive) const + +{ + ostringstream oss; + + + ascii_data_write(oss , format , exhaustive); + + return oss.str(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkovData object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] format format (line/column), + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkovData::ascii_data_write(StatError &error , const string path , + output_sequence_format format , bool exhaustive) const + +{ + bool status = false; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + if (format != 'a') { + MarkovianSequences::ascii_write(out_file , exhaustive , true); + } + ascii_print(out_file , format , true , posterior_probability , entropy , nb_state_sequence , posterior_state_probability); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SemiMarkovData object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkovData::spreadsheet_write(StatError &error , const string path) const + +{ + bool status = false; + + + if (semi_markov) { + ofstream out_file(path.c_str()); + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + semi_markov->spreadsheet_write(out_file , this , + CategoricalSequenceProcess::test_hidden(semi_markov->nb_output_process , semi_markov->categorical_process)); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a SemiMarkovData object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkovData::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status = false; + + + if (semi_markov) { + status = semi_markov->plot_write(prefix , title , this); + + error.init(); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a SemiMarkovData object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* SemiMarkovData::get_plotable() const + +{ + MultiPlotSet *plot_set; + + + if (semi_markov) { + plot_set = semi_markov->get_plotable(this); + } + else { + plot_set = NULL; + } + + return plot_set; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/semi_markov.h b/src/cpp/sequence_analysis/semi_markov.h new file mode 100644 index 0000000..e23afb4 --- /dev/null +++ b/src/cpp/sequence_analysis/semi_markov.h @@ -0,0 +1,379 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef SEMI_MARKOV_H +#define SEMI_MARKOV_H + + +#include "sequences.h" + + +namespace sequence_analysis { + + + +/**************************************************************** + * + * Constants + */ + + + const int LEAVE_LENGTH = 10000; // maximum length for the computation of the probability of + // leaving definitively a state + + const double OCCUPANCY_LIKELIHOOD_DIFF = 1.e-5; // threshold for stopping the EM iterations + const int OCCUPANCY_NB_ITER = 10000; // maximum number of EM iterations + const int OCCUPANCY_COEFF = 10; // rounding coefficient for the state occupancy distribution estimator + + enum state_sojourn_type { + MARKOVIAN , + SEMI_MARKOVIAN + }; + + + +/**************************************************************** + * + * Class definition + */ + + + /// \brief Semi-Markov chain + + class SemiMarkovChain : public stat_tool::Chain { + + public : + + state_sojourn_type *sojourn_type; ///< MARKOVIAN/SEMI_MARKOVIAN + CategoricalSequenceProcess *state_process; ///< state process + Forward **forward; ///< forward sojourn time distributions + + void copy(const SemiMarkovChain &smarkov , int param = stat_tool::I_DEFAULT); + void remove(); + + SemiMarkovChain(); + SemiMarkovChain(stat_tool::process_type itype , int inb_state); + SemiMarkovChain(const stat_tool::Chain *pchain , const CategoricalSequenceProcess *poccupancy); + SemiMarkovChain(const SemiMarkovChain &smarkov , int param = stat_tool::I_DEFAULT); + ~SemiMarkovChain(); + SemiMarkovChain& operator=(const SemiMarkovChain &smarkov); + + int nb_parameter_computation(double min_probability = 0.) const; + + void initial_probability_computation(); + + void index_state_distribution(); + double* memory_computation() const; + void state_no_occurrence_probability(int state , double increment = LEAVE_INCREMENT); + void state_first_occurrence_distribution(int state , int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void state_leave_probability(int state , double increment = LEAVE_INCREMENT); + void state_recurrence_time_distribution(int state , int min_nb_value = 1 , + double cumul_threshold = OCCUPANCY_THRESHOLD); + void state_nb_pattern_mixture(int state , stat_tool::count_pattern pattern); + }; + + + class SemiMarkovData; + + /** + \class SemiMarkov + \brief Semi-Markov processes with an output process (conditionally independent + given states). + Emission distributions are represented by + CategoricalSequenceProcess **categorical_process + DiscreteParametricProcess **discrete_parametric_process and + ContinuousParametricProcess **continuous_parametric_process + categorical_process[0] represents the semi-Markov chain (states) + for v > 0 among categorical_process[v], discrete_parametric_process[v] + and continuous_parametric_process[v], exactly one is non NULL, representing + the nature of vth observed process. + */ + class SemiMarkov : public stat_tool::StatInterface , protected SemiMarkovChain { + + friend class MarkovianSequences; + friend class SemiMarkovIterator; + friend class SemiMarkovData; + + friend std::ostream& operator<<(std::ostream &os , const SemiMarkov &smarkov) + { return smarkov.ascii_write(os , smarkov.semi_markov_data); } + + protected : + + int nb_iterator; ///< number of iterators pointing on the SemiMarkov object + SemiMarkovData *semi_markov_data; ///< pointer on a SemiMarkovData object + int nb_output_process; ///< number of observation processes + CategoricalSequenceProcess **categorical_process; ///< categorical observation processes + stat_tool::DiscreteParametricProcess **discrete_parametric_process; ///< discrete parametric observation processes + stat_tool::ContinuousParametricProcess **continuous_parametric_process; ///< continuous parametric observation processes + + SemiMarkov(const stat_tool::Chain *pchain , const CategoricalSequenceProcess *poccupancy , + int inb_output_process , stat_tool::CategoricalProcess **pobservation , + int length , bool counting_flag); + SemiMarkov(const stat_tool::Chain *pchain , const CategoricalSequenceProcess *poccupancy , + int inb_output_process , stat_tool::CategoricalProcess **categorical_observation , + stat_tool::DiscreteParametricProcess **discrete_parametric_observation , + stat_tool::ContinuousParametricProcess **continuous_parametric_observation , + int length , bool counting_flag); + + void copy(const SemiMarkov &smarkov , bool data_flag = true , + int param = stat_tool::I_DEFAULT); + void remove(); + + std::ostream& ascii_write(std::ostream &os , const SemiMarkovData *seq , + bool exhaustive = false , bool file_flag = false , + bool hidden = false) const; + std::ostream& spreadsheet_write(std::ostream &os , const SemiMarkovData *seq , + bool hidden = false) const; + bool plot_write(const char *prefix , const char *title , + const SemiMarkovData *seq) const; + stat_tool::MultiPlotSet* get_plotable(const SemiMarkovData *seq) const; + + int nb_parameter_computation(double min_probability = 0.) const; + double penalty_computation(bool hidden , double min_probability = 0.) const; + + void index_output_distribution(int variable); + void output_no_occurrence_probability(int variable , int output , + double increment = LEAVE_INCREMENT); + void output_first_occurrence_distribution(int variable , int output , + int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void output_leave_probability(const double *memory , + int variable , int output , + double increment = LEAVE_INCREMENT); + void output_recurrence_time_distribution(const double *memory , int variable , + int output , int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void output_sojourn_time_distribution(const double *memory , int variable , + int output , int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void output_nb_run_mixture(int variable , int output); + void output_nb_occurrence_mixture(int variable , int output); + + public : + + SemiMarkov(); + SemiMarkov(stat_tool::process_type itype , int inb_state , int inb_output_process , int *nb_value); + SemiMarkov(const stat_tool::Chain *pchain , const CategoricalSequenceProcess *poccupancy , + const stat_tool::CategoricalProcess *pobservation , int length , + bool counting_flag); + SemiMarkov(const SemiMarkov &smarkov , bool data_flag = true , + int param = stat_tool::I_DEFAULT); + void conditional_delete(); + ~SemiMarkov(); + SemiMarkov& operator=(const SemiMarkov &smarkov); + + DiscreteParametricModel* extract(stat_tool::StatError &error , + stat_tool::process_distribution dist_type , + int variable , int value) const; + DiscreteParametricModel* extract(stat_tool::StatError &error , int state , + stat_tool::process_distribution histo_type = FINAL_RUN) const; + SemiMarkovData* extract_data(stat_tool::StatError &error) const; + + SemiMarkov* thresholding(double min_probability = MIN_PROBABILITY) const; + + static SemiMarkov* ascii_read(stat_tool::StatError &error , const std::string path , + int length = DEFAULT_LENGTH , bool counting_flag = true , + double cumul_threshold = OCCUPANCY_THRESHOLD); + + std::ostream& line_write(std::ostream &os) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + void characteristic_computation(int length , bool counting_flag , int variable = stat_tool::I_DEFAULT); + void characteristic_computation(const SemiMarkovData &seq , bool counting_flag , + int variable = stat_tool::I_DEFAULT , bool length_flag = true); + + double likelihood_computation(const MarkovianSequences &seq , int index) const; + double likelihood_computation(const SemiMarkovData &seq) const; + + SemiMarkovData* simulation(stat_tool::StatError &error , const FrequencyDistribution &hlength , + bool counting_flag = true , bool divergence_flag = false) const; + SemiMarkovData* simulation(stat_tool::StatError &error , int nb_sequence , int length , + bool counting_flag = true) const; + SemiMarkovData* simulation(stat_tool::StatError &error , int nb_sequence , + const MarkovianSequences &iseq , bool counting_flag = true) const; + + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const SemiMarkov **ismarkov , + stat_tool::FrequencyDistribution **hlength , + const std::string path = "") const; + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const SemiMarkov **smarkov , int nb_sequence , + int length , const std::string path = "") const; + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const SemiMarkov **smarkov , int nb_sequence , + const MarkovianSequences **seq , const std::string path = "") const; + + // class member access + + int get_nb_iterator() const { return nb_iterator; } + SemiMarkovData* get_semi_markov_data() const { return semi_markov_data; } + int get_nb_output_process() const { return nb_output_process; } + CategoricalSequenceProcess** get_categorical_process() + const { return categorical_process; } + CategoricalSequenceProcess* get_categorical_process(int variable) + const { return categorical_process[variable]; } + stat_tool::DiscreteParametricProcess** get_discrete_parametric_process() const + { return discrete_parametric_process; } + stat_tool::DiscreteParametricProcess* get_discrete_parametric_process(int variable) const + { return discrete_parametric_process[variable]; } + stat_tool::ContinuousParametricProcess** get_continuous_parametric_process() const + { return continuous_parametric_process; } + stat_tool::ContinuousParametricProcess* get_continuous_parametric_process(int variable) const + { return continuous_parametric_process[variable]; } + }; + + + /// \brief Semi-Markov chain iterator for asynchronous simulation + + class SemiMarkovIterator { + + private : + + SemiMarkov *semi_markov; ///< pointer on a SemiMarkov object + int state; ///< state + int occupancy; ///< state occupancy + int counter; ///< counter + + void copy(const SemiMarkovIterator &it); + + public : + + SemiMarkovIterator(SemiMarkov *ismarkov); + SemiMarkovIterator(const SemiMarkovIterator &iter) + { copy(iter); } + ~SemiMarkovIterator(); + SemiMarkovIterator& operator=(const SemiMarkovIterator &iter); + + bool simulation(int **int_seq , int length = 1 , bool initialization = false); + int** simulation(int length = 1 , bool initialization = false); + + // class member access + + SemiMarkov* get_semi_markov() const { return semi_markov; } + int get_state() const { return state; } + int get_occupancy() const { return occupancy; } + int get_counter() const { return counter; } + int get_nb_variable() const { return (semi_markov ? semi_markov->nb_output_process + 1 : 0); } + }; + + + /// \brief Data structure corresponding to a semi-Markov chain + + class SemiMarkovData : public MarkovianSequences { + + friend class MarkovianSequences; + friend class SemiMarkov; + friend class HiddenSemiMarkov; + + friend std::ostream& operator<<(std::ostream &os , const SemiMarkovData &seq) + { return seq.ascii_write(os , false); } + + private : + + SemiMarkov *semi_markov; ///< pointer on a SemiMarkov object + stat_tool::ChainData *chain_data; ///< initial states and transition counts + double likelihood; ///< log-likelihood for the observed sequences + double restoration_likelihood; ///< log-likelihood for the restored state sequences + double sample_entropy; ///< entropy of the state sequences for the sample + double *posterior_probability; ///< posterior probabilities of the most probable state sequences + double *posterior_state_probability; ///< posterior probabilities of the most probable initial state + double *entropy; ///< entropies of the state sequences + double *nb_state_sequence; ///< numbers of state sequences + + void copy(const SemiMarkovData &seq , bool model_flag = true); + + public : + + SemiMarkovData(); + SemiMarkovData(const stat_tool::FrequencyDistribution &ihlength , int inb_variable , + stat_tool::variable_nature *itype , bool init_flag = false); + SemiMarkovData(const MarkovianSequences &seq); + SemiMarkovData(const MarkovianSequences &seq , sequence_transformation transform , bool initial_run_flag); + SemiMarkovData(const SemiMarkovData &seq , bool model_flag = true , + sequence_transformation transform = SEQUENCE_COPY) + :MarkovianSequences(seq , transform) { copy(seq , model_flag); } + ~SemiMarkovData(); + SemiMarkovData& operator=(const SemiMarkovData &seq); + + DiscreteDistributionData* extract(stat_tool::StatError &error , + stat_tool::process_distribution histo_type , + int variable , int value) const; + SemiMarkovData* explicit_index_parameter(stat_tool::StatError &error) const; + SemiMarkovData* remove_index_parameter(stat_tool::StatError &error) const; + MarkovianSequences* build_auxiliary_variable(stat_tool::StatError &error) const; + MarkovianSequences* residual_sequences(stat_tool::StatError &error) const; + + std::ostream& ascii_data_write(std::ostream &os , output_sequence_format format = COLUMN , + bool exhaustive = false) const; + std::string ascii_data_write(output_sequence_format format = COLUMN , bool exhaustive = false) const; + bool ascii_data_write(stat_tool::StatError &error , const std::string path , + output_sequence_format format = COLUMN , bool exhaustive = false) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + void build_transition_count(const SemiMarkov *smarkov = NULL); + + // class member access + + SemiMarkov* get_semi_markov() const { return semi_markov; } + stat_tool::ChainData* get_chain_data() const { return chain_data; } + double get_likelihood() const { return likelihood; } + double get_restoration_likelihood() const { return restoration_likelihood; } + double get_sample_entropy() const { return sample_entropy; } + double get_posterior_probability(int index) const { return posterior_probability[index]; } + double get_state_posterior_probability(int index) const { return posterior_state_probability[index]; } + double get_entropy(int index) const { return entropy[index]; } + double get_nb_state_sequence(int index) const { return nb_state_sequence[index]; } + }; + + +}; // namespace sequence_analysis + + + +#endif diff --git a/src/cpp/sequence_analysis/sequence_characteristics.cpp b/src/cpp/sequence_analysis/sequence_characteristics.cpp new file mode 100644 index 0000000..799c506 --- /dev/null +++ b/src/cpp/sequence_analysis/sequence_characteristics.cpp @@ -0,0 +1,2416 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include + +#include "stat_tool/stat_label.h" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the SequenceCharacteristics class. + * + * \param[in] inb_value number of categories. + */ +/*--------------------------------------------------------------*/ + +SequenceCharacteristics::SequenceCharacteristics(int inb_value) + +{ + nb_value = inb_value; + + index_value = NULL; + explicit_index_value = NULL; + + first_occurrence = NULL; + recurrence_time = NULL; + sojourn_time = NULL; + initial_run = NULL; + final_run = NULL; + + nb_run = NULL; + nb_occurrence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SequenceCharacteristics class adding/removing + * initial run frequency distributions. + * + * \param[in] characteristics reference on a SequenceCharacteristics object, + * \param[in] initial_run_flag flag construction of the initial run frequency distributions. + */ +/*--------------------------------------------------------------*/ + +SequenceCharacteristics::SequenceCharacteristics(const SequenceCharacteristics &characteristics , + bool initial_run_flag) + +{ + int i; + + + nb_value = characteristics.nb_value; + + index_value = new Curves(*(characteristics.index_value)); + + if (characteristics.explicit_index_value) { + explicit_index_value = new Curves(*(characteristics.explicit_index_value)); + } + else { + explicit_index_value = NULL; + } + + first_occurrence = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + first_occurrence[i] = new FrequencyDistribution(*(characteristics.first_occurrence[i])); + } + + recurrence_time = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + recurrence_time[i] = new FrequencyDistribution(*(characteristics.recurrence_time[i])); + } + + sojourn_time = new FrequencyDistribution*[nb_value]; + + if (initial_run_flag) { + initial_run = new FrequencyDistribution*[nb_value]; + } + else { + initial_run = NULL; + } + + final_run = new FrequencyDistribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + if (((characteristics.initial_run) && (initial_run_flag)) || + ((!(characteristics.initial_run)) && (!initial_run_flag))) { + sojourn_time[i] = new FrequencyDistribution(*(characteristics.sojourn_time[i])); + final_run[i] = new FrequencyDistribution(*(characteristics.final_run[i])); + } + else { + sojourn_time[i] = NULL; + final_run[i] = NULL; + } + + if ((characteristics.initial_run) && (initial_run_flag)) { + initial_run[i] = new FrequencyDistribution(*(characteristics.initial_run[i])); + } + } + + if (characteristics.nb_run) { + nb_run = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + nb_run[i] = new FrequencyDistribution(*(characteristics.nb_run[i])); + } + } + else { + nb_run = NULL; + } + + if (characteristics.nb_occurrence) { + nb_occurrence = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + nb_occurrence[i] = new FrequencyDistribution(*(characteristics.nb_occurrence[i])); + } + } + else { + nb_occurrence = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of the sequence characteristics for a categorical variable. + * + * \param[in] characteristics reference on a SequenceCharacteristics object. + */ +/*--------------------------------------------------------------*/ + +void SequenceCharacteristics::copy(const SequenceCharacteristics &characteristics) + +{ + int i; + + + nb_value = characteristics.nb_value; + + index_value = new Curves(*(characteristics.index_value)); + + if (characteristics.explicit_index_value) { + explicit_index_value = new Curves(*(characteristics.explicit_index_value)); + } + else { + explicit_index_value = NULL; + } + + first_occurrence = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + first_occurrence[i] = new FrequencyDistribution(*(characteristics.first_occurrence[i])); + } + + recurrence_time = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + recurrence_time[i] = new FrequencyDistribution(*(characteristics.recurrence_time[i])); + } + + sojourn_time = new FrequencyDistribution*[nb_value]; + + if (characteristics.initial_run) { + initial_run = new FrequencyDistribution*[nb_value]; + } + else { + initial_run = NULL; + } + + final_run = new FrequencyDistribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + sojourn_time[i] = new FrequencyDistribution(*(characteristics.sojourn_time[i])); + if (characteristics.initial_run) { + initial_run[i] = new FrequencyDistribution(*(characteristics.initial_run[i])); + } + final_run[i] = new FrequencyDistribution(*(characteristics.final_run[i])); + } + + if (characteristics.nb_run) { + nb_run = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + nb_run[i] = new FrequencyDistribution(*(characteristics.nb_run[i])); + } + } + else { + nb_run = NULL; + } + + if (characteristics.nb_occurrence) { + nb_occurrence = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + nb_occurrence[i] = new FrequencyDistribution(*(characteristics.nb_occurrence[i])); + } + } + else { + nb_occurrence = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of the unaffected sequence characteristics for a categorical variable + * in the case of the reversing of the direction of sequences. + * + * \param[in] characteristics reference on a SequenceCharacteristics object. + */ +/*--------------------------------------------------------------*/ + +void SequenceCharacteristics::reverse(const SequenceCharacteristics &characteristics) + +{ + int i; + + + nb_value = characteristics.nb_value; + + index_value = NULL; + explicit_index_value = NULL; + first_occurrence = NULL; + + recurrence_time = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + recurrence_time[i] = new FrequencyDistribution(*(characteristics.recurrence_time[i])); + } + + if (characteristics.initial_run) { + sojourn_time = new FrequencyDistribution*[nb_value]; + initial_run = new FrequencyDistribution*[nb_value]; + final_run = new FrequencyDistribution*[nb_value]; + + for (i = 0;i < nb_value;i++) { + sojourn_time[i] = new FrequencyDistribution(*(characteristics.sojourn_time[i])); + initial_run[i] = new FrequencyDistribution(*(characteristics.final_run[i])); + final_run[i] = new FrequencyDistribution(*(characteristics.initial_run[i])); + } + } + + else { + sojourn_time = NULL; + initial_run = NULL; + final_run = NULL; + } + + if (characteristics.nb_run) { + nb_run = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + nb_run[i] = new FrequencyDistribution(*(characteristics.nb_run[i])); + } + } + else { + nb_run = NULL; + } + + if (characteristics.nb_occurrence) { + nb_occurrence = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + nb_occurrence[i] = new FrequencyDistribution(*(characteristics.nb_occurrence[i])); + } + } + else { + nb_occurrence = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor by copy of the SequenceCharacteristics class. + * + * \param[in] characteristics reference on a SequenceCharacteristics object, + * \param[in] transform type of transform. + */ +/*--------------------------------------------------------------*/ + +SequenceCharacteristics::SequenceCharacteristics(const SequenceCharacteristics &characteristics , + sequence_transformation transform) + +{ + switch (transform) { + case REVERSE : + reverse(characteristics); + break; + default : + copy(characteristics); + break; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the data members of the SequenceCharacteristics class. + */ +/*--------------------------------------------------------------*/ + +void SequenceCharacteristics::remove() + +{ + int i; + + + delete index_value; + delete explicit_index_value; + + if (first_occurrence) { + for (i = 0;i < nb_value;i++) { + delete first_occurrence[i]; + } + delete [] first_occurrence; + } + + if (recurrence_time) { + for (i = 0;i < nb_value;i++) { + delete recurrence_time[i]; + } + delete [] recurrence_time; + } + + if (sojourn_time) { + for (i = 0;i < nb_value;i++) { + delete sojourn_time[i]; + } + delete [] sojourn_time; + } + + if (initial_run) { + for (i = 0;i < nb_value;i++) { + delete initial_run[i]; + } + delete [] initial_run; + } + + if (final_run) { + for (i = 0;i < nb_value;i++) { + delete final_run[i]; + } + delete [] final_run; + } + + if (nb_run) { + for (i = 0;i < nb_value;i++) { + delete nb_run[i]; + } + delete [] nb_run; + } + + if (nb_occurrence) { + for (i = 0;i < nb_value;i++) { + delete nb_occurrence[i]; + } + delete [] nb_occurrence; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the SequenceCharacteristics class. + */ +/*--------------------------------------------------------------*/ + +SequenceCharacteristics::~SequenceCharacteristics() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the SequenceCharacteristics class. + * + * \param[in] characteristics reference on a SequenceCharacteristics object. + * + * \return SequenceCharacteristics object. + */ +/*--------------------------------------------------------------*/ + +SequenceCharacteristics& SequenceCharacteristics::operator=(const SequenceCharacteristics &characteristics) + +{ + if (&characteristics != this) { + remove(); + copy(characteristics); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the sojourn time frequency distributions for a categorical variable. + * + * \param[in] max_length maximum sequence length, + * \param[in] initial_run_flag flag on the construction of + * the initial run frequency distributions. + */ +/*--------------------------------------------------------------*/ + +void SequenceCharacteristics::create_sojourn_time_frequency_distribution(int max_length , int initial_run_flag) + +{ + int i; + + + sojourn_time = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + sojourn_time[i] = new FrequencyDistribution(max_length + 1); + } + + if (initial_run_flag) { + initial_run = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + initial_run[i] = new FrequencyDistribution(max_length + 1); + } + } + + final_run = new FrequencyDistribution*[nb_value]; + for (i = 0;i < nb_value;i++) { + final_run[i] = new FrequencyDistribution(max_length + 1); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SequenceCharacteristics object. + * + * \param[in,out] os stream, + * \param[in] type variable type, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] exhaustive flag detail level, + * \param[in] comment_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& SequenceCharacteristics::ascii_print(ostream &os , int type , + const FrequencyDistribution &length_distribution , + bool exhaustive , bool comment_flag) const + +{ + int i; + + + if (exhaustive) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " "; + for (i = 0;i < nb_value;i++) { + os << " | " << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + } + os << " | " << STAT_label[STATL_FREQUENCY] << endl; + + index_value->ascii_print(os , comment_flag); + + if (explicit_index_value) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_INDEX_PARAMETER]; + for (i = 0;i < nb_value;i++) { + os << " | " << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + } + os << " | " << STAT_label[STATL_FREQUENCY] << endl; + + explicit_index_value->ascii_print(os , comment_flag); + } + } + + for (i = 0;i < nb_value;i++) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + first_occurrence[i]->ascii_characteristic_print(os , false , comment_flag); + + if ((first_occurrence[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + first_occurrence[i]->ascii_print(os , comment_flag); + } + } + + for (i = 0;i < nb_value;i++) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + recurrence_time[i]->ascii_characteristic_print(os , false , comment_flag); + + if ((recurrence_time[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + recurrence_time[i]->ascii_print(os , comment_flag); + } + } + + for (i = 0;i < nb_value;i++) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + sojourn_time[i]->ascii_characteristic_print(os , false , comment_flag); + + if ((sojourn_time[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + sojourn_time[i]->ascii_print(os , comment_flag); + } + + if (initial_run) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + initial_run[i]->ascii_characteristic_print(os , false , comment_flag); + + if ((initial_run[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + initial_run[i]->ascii_print(os , comment_flag); + } + } + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + final_run[i]->ascii_characteristic_print(os , false , comment_flag); + + if ((final_run[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + final_run[i]->ascii_print(os , comment_flag); + } + } + + if (nb_run) { + for (i = 0;i < nb_value;i++) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + nb_run[i]->ascii_characteristic_print(os , (length_distribution.variance > 0. ? false : true) , comment_flag); + + if ((nb_run[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + nb_run[i]->ascii_print(os , comment_flag); + } + } + } + + if (nb_occurrence) { + for (i = 0;i < nb_value;i++) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + nb_occurrence[i]->ascii_characteristic_print(os , (length_distribution.variance > 0. ? false : true) , comment_flag); + + if ((nb_occurrence[i]->nb_element > 0) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + nb_occurrence[i]->ascii_print(os , comment_flag); + } + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a SequenceCharacteristics object at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] type variable type, + * \param[in] length_distribution sequence length frequency distribution. + */ +/*--------------------------------------------------------------*/ + +ostream& SequenceCharacteristics::spreadsheet_print(ostream &os , int type , + const FrequencyDistribution &length_distribution) const + +{ + int i; + Curves *smoothed_curves; + + + os << "\n"; + for (i = 0;i < nb_value;i++) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + } + os << "\t" << STAT_label[STATL_FREQUENCY] << endl; + index_value->spreadsheet_print(os); + + smoothed_curves = new Curves(*index_value , SMOOTHING); + + os << "\n" << SEQ_label[SEQL_SMOOTHED_OBSERVED_PROBABILITIES] << endl; + for (i = 0;i < nb_value;i++) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + } + os << "\t" << STAT_label[STATL_FREQUENCY] << endl; + smoothed_curves->spreadsheet_print(os); + + delete smoothed_curves; + + if (explicit_index_value) { + os << "\n" << SEQ_label[SEQL_INDEX_PARAMETER]; + for (i = 0;i < nb_value;i++) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + } + os << "\t" << STAT_label[STATL_FREQUENCY] << endl; + explicit_index_value->spreadsheet_print(os); + + smoothed_curves = new Curves(*explicit_index_value , SMOOTHING); + + os << "\n" << SEQ_label[SEQL_SMOOTHED_OBSERVED_PROBABILITIES] << endl; + os << SEQ_label[SEQL_INDEX_PARAMETER]; + for (i = 0;i < nb_value;i++) { + os << "\t" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + } + os << "\t" << STAT_label[STATL_FREQUENCY] << endl; + smoothed_curves->spreadsheet_print(os); + + delete smoothed_curves; + } + + for (i = 0;i < nb_value;i++) { + os << "\n" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + first_occurrence[i]->spreadsheet_characteristic_print(os); + + if (first_occurrence[i]->nb_element > 0) { + os << "\n\t" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + first_occurrence[i]->spreadsheet_print(os); + } + } + + for (i = 0;i < nb_value;i++) { + os << "\n" << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + recurrence_time[i]->spreadsheet_characteristic_print(os); + + if (recurrence_time[i]->nb_element > 0) { + os << "\n\t" << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + recurrence_time[i]->spreadsheet_print(os); + } + } + + for (i = 0;i < nb_value;i++) { + os << "\n" << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + sojourn_time[i]->spreadsheet_characteristic_print(os); + + if (sojourn_time[i]->nb_element > 0) { + os << "\n\t" << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + sojourn_time[i]->spreadsheet_print(os); + } + + if (initial_run) { + os << "\n" << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + initial_run[i]->spreadsheet_characteristic_print(os); + + if (initial_run[i]->nb_element > 0) { + os << "\n\t" << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + initial_run[i]->spreadsheet_print(os); + } + } + + os << "\n" << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + final_run[i]->spreadsheet_characteristic_print(os); + + if (final_run[i]->nb_element > 0) { + os << "\n\t" << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + final_run[i]->spreadsheet_print(os); + } + } + + if (nb_run) { + for (i = 0;i < nb_value;i++) { + os << "\n" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + nb_run[i]->spreadsheet_characteristic_print(os , (length_distribution.variance > 0. ? false : true)); + + if (nb_run[i]->nb_element > 0) { + os << "\n\t" << SEQ_label[SEQL_NB_RUN_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + nb_run[i]->spreadsheet_print(os); + } + } + } + + if (nb_occurrence) { + for (i = 0;i < nb_value;i++) { + os << "\n" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + nb_occurrence[i]->spreadsheet_characteristic_print(os , (length_distribution.variance > 0. ? false : true)); + + if (nb_occurrence[i]->nb_element > 0) { + os << "\n\t" << SEQ_label[SEQL_NB_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + nb_occurrence[i]->spreadsheet_print(os); + } + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a SequenceCharacteristics object using Gnuplot. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] variable variable index, + * \param[in] nb_variable number of variables, + * \param[in] type variable type, + * \param[in] length_distribution sequence length frequency distribution. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SequenceCharacteristics::plot_print(const char *prefix , const char *title , + int variable , int nb_variable , int type , + const FrequencyDistribution &length_distribution) const + +{ + bool status , start; + int i , j , k; + int index_length , nb_histo , histo_index; + Curves *smoothed_curves; + const FrequencyDistribution **phisto; + ostringstream data_file_name[3]; + + + // writing of data files + + data_file_name[0] << prefix << variable + 1 << 0 << ".dat"; + + index_length = index_value->plot_length_computation(); + + if (index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + smoothed_curves = new Curves(*index_value , SMOOTHING); + } + else { + smoothed_curves = NULL; + } + + status = index_value->plot_print((data_file_name[0].str()).c_str() , + index_length , smoothed_curves); + delete smoothed_curves; + + if (explicit_index_value) { + data_file_name[2] << prefix << variable + 1 << 2 << ".dat"; + status = explicit_index_value->plot_print((data_file_name[2].str()).c_str()); + } + + if (status) { + phisto = new const FrequencyDistribution*[1 + NB_OUTPUT * 6]; + + data_file_name[1] << prefix << variable + 1 << 1 << ".dat"; + + nb_histo = 0; + for (i = 0;i < nb_value;i++) { + if (first_occurrence[i]->nb_element > 0) { + phisto[nb_histo++] = first_occurrence[i]; + } + } + + for (i = 0;i < nb_value;i++) { + if (recurrence_time[i]->nb_element > 0) { + phisto[nb_histo++] = recurrence_time[i]; + } + } + + for (i = 0;i < nb_value;i++) { + if (sojourn_time[i]->nb_element > 0) { + phisto[nb_histo++] = sojourn_time[i]; + } + if ((initial_run) && (initial_run[i]->nb_element > 0)) { + phisto[nb_histo++] = initial_run[i]; + } + if (final_run[i]->nb_element > 0) { + phisto[nb_histo++] = final_run[i]; + } + } + + if ((nb_run) && (nb_occurrence)) { + for (i = 0;i < nb_value;i++) { + if ((nb_run[i]->nb_element > 0) && (nb_occurrence[i]->nb_element > 0)) { + phisto[nb_histo++] = nb_run[i]; + phisto[nb_histo++] = nb_occurrence[i]; + } + } + } + + length_distribution.plot_print((data_file_name[1].str()).c_str() , nb_histo , phisto); + + // writing of script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << variable + 1 << 1 << ".plot"; + break; + case 1 : + file_name[0] << prefix << variable + 1 << 1 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << variable + 1 << 1 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n"; + + if (index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + out_file << "set title" << " \""; + if (title) { + out_file << title << " - "; + } + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - "; + } + out_file << SEQ_label[SEQL_SMOOTHED_OBSERVED_PROBABILITIES] << "\"\n\n"; + + if (index_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + j = nb_value + 1; + + out_file << "plot [0:" << index_length - 1 << "] [0:1] "; + for (k = 0;k < nb_value;k++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << j++ << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " + << k << "\" with linespoints"; + if (k < nb_value - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (index_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + out_file << "set title \""; + if (title) { + out_file << title; + if (nb_variable > 1) { + out_file << " - "; + } + } + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << variable + 1; + } + out_file << "\"\n\n"; + + if (index_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [0:" << index_length - 1 << "] [0:1] "; + for (j = 0;j < nb_value;j++) { + out_file << "\"" << label((data_file_name[0].str()).c_str()) << "\" using " + << j + 1 << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " + << j << "\" with linespoints"; + if (j < nb_value - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (index_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (explicit_index_value) { + out_file << "set title \""; + if (title) { + out_file << title; + if (nb_variable > 1) { + out_file << " - "; + } + } + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << variable + 1; + } + out_file << "\"\n\n"; + + out_file << "set xlabel \"" << SEQ_label[SEQL_INDEX] << "\"" << endl; + if (explicit_index_value->index_parameter[explicit_index_value->length - 1] - explicit_index_value->index_parameter[0] < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [" << explicit_index_value->index_parameter[0] << ":" + << explicit_index_value->index_parameter[explicit_index_value->length - 1] << "] [0:1] "; + for (j = 0;j < nb_value;j++) { + out_file << "\"" << label((data_file_name[2].str()).c_str()) << "\" using 1:" + << j + 2 << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " + << j << "\" with linespoints"; + if (j < nb_value - 1) { + out_file << ",\\"; + } + out_file << endl; + } + + if (explicit_index_value->index_parameter[explicit_index_value->length - 1] - explicit_index_value->index_parameter[0] < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + out_file << "set xlabel" << endl; + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (length_distribution.nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(length_distribution.max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << length_distribution.nb_value - 1 << "] [0:" + << (int)(length_distribution.max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using 1 title \"" + << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (length_distribution.nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(length_distribution.max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = 2; + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << variable + 1 << 2 << ".plot"; + break; + case 1 : + file_name[0] << prefix << variable + 1 << 2 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << variable + 1 << 2 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (nb_variable > 1)) { + out_file << " \""; + if (title) { + out_file << title; + if (nb_variable > 1) { + out_file << " - "; + } + } + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << variable + 1; + } + out_file << "\""; + } + out_file << "\n\n"; + + j = histo_index; + + start = true; + for (k = 0;k < nb_value;k++) { + if (first_occurrence[k]->nb_element > 0) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (MAX(1 , first_occurrence[k]->nb_value - 1) < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(first_occurrence[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << MAX(first_occurrence[k]->nb_value - 1 , 1) << "] [0:" + << (int)(first_occurrence[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << k + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (MAX(1 , first_occurrence[k]->nb_value - 1) < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(first_occurrence[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = j; + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << variable + 1 << 3 << ".plot"; + break; + case 1 : + file_name[0] << prefix << variable + 1 << 3 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << variable + 1 << 3 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (nb_variable > 1)) { + out_file << " \""; + if (title) { + out_file << title; + if (nb_variable > 1) { + out_file << " - "; + } + } + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << variable + 1; + } + out_file << "\""; + } + out_file << "\n\n"; + + j = histo_index; + + start = true; + for (k = 0;k < nb_value;k++) { + if (recurrence_time[k]->nb_element > 0) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (recurrence_time[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(recurrence_time[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << recurrence_time[k]->nb_value - 1 << "] [0:" + << (int)(recurrence_time[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j++ + << " title \"" << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << k << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (recurrence_time[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(recurrence_time[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = j; + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << variable + 1 << 4 << ".plot"; + break; + case 1 : + file_name[0] << prefix << variable + 1 << 4 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << variable + 1 << 4 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (nb_variable > 1)) { + out_file << " \""; + if (title) { + out_file << title; + if (nb_variable > 1) { + out_file << " - "; + } + } + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << variable + 1; + } + out_file << "\""; + } + out_file << "\n\n"; + + j = histo_index; + + start = true; + for (k = 0;k < nb_value;k++) { + if (sojourn_time[k]->nb_element > 0) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (sojourn_time[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(sojourn_time[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << sojourn_time[k]->nb_value - 1 << "] [0:" + << (int)(sojourn_time[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j++ + << " title \"" << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << k + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (sojourn_time[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(sojourn_time[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if ((initial_run) && (initial_run[k]->nb_element > 0)) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (initial_run[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(initial_run[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << initial_run[k]->nb_value - 1 << "] [0:" + << (int)(initial_run[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << k + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (initial_run[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(initial_run[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if (final_run[k]->nb_element > 0) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (final_run[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(final_run[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << final_run[k]->nb_value - 1 << "] [0:" + << (int)(final_run[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << k + << " " << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (final_run[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(final_run[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + histo_index = j; + + if ((nb_run) && (nb_occurrence)) { + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << variable + 1 << 5 << ".plot"; + break; + case 1 : + file_name[0] << prefix << variable + 1 << 5 << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << variable + 1 << 5 << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if ((title) || (nb_variable > 1)) { + out_file << " \""; + if (title) { + out_file << title; + if (nb_variable > 1) { + out_file << " - "; + } + } + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << variable + 1; + } + out_file << "\""; + } + out_file << "\n\n"; + + j = histo_index; + + start = true; + for (k = 0;k < nb_value;k++) { + if ((nb_run[k]->nb_element > 0) && (nb_occurrence[k]->nb_element > 0)) { + if (!start) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + else { + start = false; + } + + if (nb_run[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(nb_run[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << nb_run[k]->nb_value - 1 << "] [0:" + << (int)(nb_run[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_NB_RUN_OF] + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << k + << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (nb_run[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(nb_run[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (nb_occurrence[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(nb_occurrence[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << nb_occurrence[k]->nb_value - 1 << "] [0:" + << (int)(nb_occurrence[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_NB_OCCURRENCE_OF] + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << k + << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (nb_occurrence[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(nb_occurrence[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (length_distribution.nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(length_distribution.max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << length_distribution.nb_value - 1 << "] [0:" + << (int)(length_distribution.max * YSCALE) + 1 << "] \"" + << label((data_file_name[1].str()).c_str()) << "\" using 1 title \"" + << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (length_distribution.nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(length_distribution.max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + delete [] phisto; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a SequenceCharacteristics object. + * + * \param[in] plot reference on a MultiPlotSet object, + * \param[in] index MultiPlot index, + * \param[in] variable variable index, + * \param[in] type variable type, + * \param[in] length_distribution sequence length frequency distribution. + */ +/*--------------------------------------------------------------*/ + +void SequenceCharacteristics::plotable_write(MultiPlotSet &plot , int &index , + int variable , int type , + const FrequencyDistribution &length_distribution) const + +{ + int i , j , k; + int index_length , nb_histo , max_nb_value , max_frequency; + double shift; + Curves *smoothed_curves; + ostringstream title , legend; + + + index_length = index_value->plot_length_computation(); + + // computation of the number of plots + + /* nb_plot_set = 2; + if (index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + nb_plot_set++; + } + + nb_plot_set++; + for (i = 0;i < nb_value;i++) { + if (first_occurrence[i]->nb_element > 0) { + nb_plot_set++; + } + } + + nb_plot_set++; + for (i = 0;i < nb_value;i++) { + if (recurrence_time[i]->nb_element > 0) { + nb_plot_set++; + } + } + + nb_plot_set++; + for (i = 0;i < nb_value;i++) { + if (sojourn_time[i]->nb_element > 0) { + nb_plot_set++; + } + if ((initial_run) && (initial_run[i]->nb_element > 0)) { + nb_plot_set++; + } + if (final_run[i]->nb_element > 0) { + nb_plot_set++; + } + } + + if ((nb_run) && (nb_occurrence)) { + nb_plot_set += 3; + for (i = 0;i < nb_value;i++) { + if ((nb_run[i]->nb_element > 0) && (nb_occurrence[i]->nb_element > 0)) { + nb_plot_set += 2; + } + } + } */ + + plot.variable_nb_viewpoint[variable] += 4; + if ((nb_run) && (nb_occurrence)) { + plot.variable_nb_viewpoint[variable]++; + } + + if (index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + + // smoothed intensity + + plot.variable[index] = variable; + plot.viewpoint[index] = INTENSITY; + + smoothed_curves = new Curves(*index_value , SMOOTHING); + + title.str(""); + if (plot.nb_variable > 1) { + title << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - "; + } + title << SEQ_label[SEQL_SMOOTHED_OBSERVED_PROBABILITIES]; + plot[index].title = title.str(); + + plot[index].xrange = Range(0 , index_length - 1); + plot[index].yrange = Range(0. , 1.); + + if (index_length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].resize(nb_value); + + for (i = 0;i < nb_value;i++) { + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + } + + smoothed_curves->plotable_write(plot[index]); + + delete smoothed_curves; + index++; + } + + // intensity + + plot.variable[index] = variable; + plot.viewpoint[index] = INTENSITY; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , index_length - 1); + plot[index].yrange = Range(0. , 1.); + + if (index_length - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + + plot[index].resize(nb_value); + + for (i = 0;i < nb_value;i++) { + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + } + + index_value->plotable_write(plot[index]); + index++; + + if (explicit_index_value) { + + // intensity as a function of the explicit index parameter + + plot.variable[index] = variable; + plot.viewpoint[index] = INTENSITY; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(explicit_index_value->index_parameter[0] , + explicit_index_value->index_parameter[explicit_index_value->length - 1]); + plot[index].yrange = Range(0. , 1.); + + if (explicit_index_value->index_parameter[explicit_index_value->length - 1] - explicit_index_value->index_parameter[0] < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + plot[index].xlabel = SEQ_label[SEQL_INDEX]; + + plot[index].resize(nb_value); + + for (i = 0;i < nb_value;i++) { + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "linespoints"; + } + + explicit_index_value->plotable_write(plot[index]); + index++; + } + + // sequence length frequency distribution + + plot.variable[index] = variable; + plot.viewpoint[index] = INTENSITY; + + plot[index].xrange = Range(0 , length_distribution.nb_value - 1); + plot[index].yrange = Range(0 , ceil(length_distribution.max * YSCALE)); + + if (length_distribution.nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(length_distribution.max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + length_distribution.plotable_frequency_write(plot[index][0]); + index++; + + // frequency distributions of the time to the 1st occurrence of a category + + plot.variable[index] = variable; + plot.viewpoint[index] = FIRST_OCCURRENCE; + + title.str(""); + if (plot.nb_variable > 1) { + title << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - "; + } + title << SEQ_label[SEQL_FIRST_OCCURRENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTIONS]; + plot[index].title = title.str(); + + // computation of the maximum time to the 1st occurrence and the maximum frequency + + nb_histo = 0; + max_nb_value = 0; + max_frequency = 0; + + for (i = 0;i < nb_value;i++) { + if (first_occurrence[i]->nb_element > 0) { + nb_histo++; + + if (first_occurrence[i]->nb_value > max_nb_value) { + max_nb_value = first_occurrence[i]->nb_value; + } + if (first_occurrence[i]->max > max_frequency) { + max_frequency = first_occurrence[i]->max; + } + } + } + + plot[index].xrange = Range(0 , max_nb_value); + plot[index].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(nb_histo); + + i = 0; + shift = 0.; + + for (j = 0;j < nb_value;j++) { + if (first_occurrence[j]->nb_element > 0) { + legend.str(""); + legend << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "impulses"; + + for (k = first_occurrence[j]->offset;k < first_occurrence[j]->nb_value;k++) { + if (first_occurrence[j]->frequency[k] > 0) { + plot[index][i].add_point(k + shift , first_occurrence[j]->frequency[k]); + } + } + + if (PLOT_SHIFT * (nb_histo - 1) < PLOT_MAX_SHIFT) { + shift += PLOT_SHIFT; + } + else { + shift += PLOT_MAX_SHIFT / (nb_histo - 1); + } + + i++; + } + } + index++; + + for (i = 0;i < nb_value;i++) { + if (first_occurrence[i]->nb_element > 0) { + + // frequency distribution of the time to the 1st occurrence of a category + + plot.variable[index] = variable; + plot.viewpoint[index] = FIRST_OCCURRENCE; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , MAX(first_occurrence[i]->nb_value - 1 , 1)); + plot[index].yrange = Range(0 , ceil(first_occurrence[i]->max * YSCALE)); + + if (MAX(first_occurrence[i]->nb_value - 1 , 1) < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(first_occurrence[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_FIRST_OCCURRENCE_OF] << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] + << " " << i << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + first_occurrence[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + + // frequency distributions of the recurrence time in a category + + plot.variable[index] = variable; + plot.viewpoint[index] = RECURRENCE_TIME; + + title.str(""); + if (plot.nb_variable > 1) { + title << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - "; + } + title << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTIONS]; + plot[index].title = title.str(); + + // computation of the maximum recurrence time and the maximum frequency + + nb_histo = 0; + max_nb_value = 0; + max_frequency = 0; + + for (i = 0;i < nb_value;i++) { + if (recurrence_time[i]->nb_element > 0) { + nb_histo++; + + if (recurrence_time[i]->nb_value > max_nb_value) { + max_nb_value = recurrence_time[i]->nb_value; + } + if (recurrence_time[i]->max > max_frequency) { + max_frequency = recurrence_time[i]->max; + } + } + } + + plot[index].xrange = Range(0 , max_nb_value); + plot[index].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(nb_histo); + + i = 0; + shift = 0.; + + for (j = 0;j < nb_value;j++) { + if (recurrence_time[j]->nb_element > 0) { + legend.str(""); + legend << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "impulses"; + + for (k = recurrence_time[j]->offset;k < recurrence_time[j]->nb_value;k++) { + if (recurrence_time[j]->frequency[k] > 0) { + plot[index][i].add_point(k + shift , recurrence_time[j]->frequency[k]); + } + } + + if (PLOT_SHIFT * (nb_histo - 1) < PLOT_MAX_SHIFT) { + shift += PLOT_SHIFT; + } + else { + shift += PLOT_MAX_SHIFT / (nb_histo - 1); + } + + i++; + } + } + index++; + + for (i = 0;i < nb_value;i++) { + if (recurrence_time[i]->nb_element > 0) { + + // frequency distribution of the recurrence time in a category + + plot.variable[index] = variable; + plot.viewpoint[index] = RECURRENCE_TIME; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , recurrence_time[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(recurrence_time[i]->max * YSCALE)); + + if (recurrence_time[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(recurrence_time[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " + << i << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + recurrence_time[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + + // frequency distributions of the sojourn time in a category + + plot.variable[index] = variable; + plot.viewpoint[index] = SOJOURN_TIME; + + title.str(""); + if (plot.nb_variable > 1) { + title << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - "; + } + title << STAT_label[STATL_SOJOURN_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTIONS]; + plot[index].title = title.str(); + + // computation of the maximum sojourn time and the maximum frequency + + nb_histo = 0; + max_nb_value = 0; + max_frequency = 0; + + for (i = 0;i < nb_value;i++) { + if (sojourn_time[i]->nb_element > 0) { + nb_histo++; + + if (sojourn_time[i]->nb_value > max_nb_value) { + max_nb_value = sojourn_time[i]->nb_value; + } + if (sojourn_time[i]->max > max_frequency) { + max_frequency = sojourn_time[i]->max; + } + } + } + + plot[index].xrange = Range(0 , max_nb_value); + plot[index].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(nb_histo); + + i = 0; + shift = 0.; + + for (j = 0;j < nb_value;j++) { + if (sojourn_time[j]->nb_element > 0) { + legend.str(""); + legend << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "impulses"; + + for (k = sojourn_time[j]->offset;k < sojourn_time[j]->nb_value;k++) { + if (sojourn_time[j]->frequency[k] > 0) { + plot[index][i].add_point(k + shift , sojourn_time[j]->frequency[k]); + } + } + + if (PLOT_SHIFT * (nb_histo - 1) < PLOT_MAX_SHIFT) { + shift += PLOT_SHIFT; + } + else { + shift += PLOT_MAX_SHIFT / (nb_histo - 1); + } + + i++; + } + } + index++; + + for (i = 0;i < nb_value;i++) { + if (sojourn_time[i]->nb_element > 0) { + + // frequency distribution of the sojourn time in a category + + plot.variable[index] = variable; + plot.viewpoint[index] = SOJOURN_TIME; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , sojourn_time[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(sojourn_time[i]->max * YSCALE)); + + if (sojourn_time[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(sojourn_time[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " + << i << " " << STAT_label[STATL_SOJOURN_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + sojourn_time[i]->plotable_frequency_write(plot[index][0]); + index++; + } + + if ((initial_run) && (initial_run[i]->nb_element > 0)) { + + // frequency distribution of the sojourn time in the first observed value + + plot.variable[index] = variable; + plot.viewpoint[index] = SOJOURN_TIME; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , initial_run[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(initial_run[i]->max * YSCALE)); + + if (initial_run[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(initial_run[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_INITIAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " + << i << " " << STAT_label[STATL_SOJOURN_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + initial_run[i]->plotable_frequency_write(plot[index][0]); + index++; + } + + if (final_run[i]->nb_element > 0) { + + // frequency distribution of the sojourn time in the last observed value + + plot.variable[index] = variable; + plot.viewpoint[index] = SOJOURN_TIME; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , final_run[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(final_run[i]->max * YSCALE)); + + if (final_run[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(final_run[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + legend.str(""); + legend << SEQ_label[SEQL_FINAL_RUN] << " - " + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " + << i << " " << STAT_label[STATL_SOJOURN_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + final_run[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + + if ((nb_run) && (nb_occurrence)) { + + // frequency distributions of the number of runs of a category per sequence + + plot.variable[index] = variable; + plot.viewpoint[index] = COUNTING; + + title.str(""); + if (plot.nb_variable > 1) { + title << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - "; + } + title << SEQ_label[SEQL_NB_RUN] << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTIONS]; + plot[index].title = title.str(); + + // computation of the maximum number of runs and the maximum frequency + + nb_histo = 0; + max_nb_value = 0; + max_frequency = 0; + + for (i = 0;i < nb_value;i++) { + if (nb_run[i]->nb_element > 0) { + nb_histo++; + + if (nb_run[i]->nb_value > max_nb_value) { + max_nb_value = nb_run[i]->nb_value; + } + if (nb_run[i]->max > max_frequency) { + max_frequency = nb_run[i]->max; + } + } + } + + plot[index].xrange = Range(0 , max_nb_value); + plot[index].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(nb_histo); + + i = 0; + shift = 0.; + + for (j = 0;j < nb_value;j++) { + if (nb_run[j]->nb_element > 0) { + legend.str(""); + legend << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "impulses"; + + for (k = nb_run[j]->offset;k < nb_run[j]->nb_value;k++) { + if (nb_run[j]->frequency[k] > 0) { + plot[index][i].add_point(k + shift , nb_run[j]->frequency[k]); + } + } + + if (PLOT_SHIFT * (nb_histo - 1) < PLOT_MAX_SHIFT) { + shift += PLOT_SHIFT; + } + else { + shift += PLOT_MAX_SHIFT / (nb_histo - 1); + } + + i++; + } + } + index++; + + // frequency distributions of the number of occurrences of a category per sequence + + plot.variable[index] = variable; + plot.viewpoint[index] = COUNTING; + + title.str(""); + if (plot.nb_variable > 1) { + title << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - "; + } + title << SEQ_label[SEQL_NB_OCCURRENCE] << " " << SEQ_label[SEQL_PER_SEQUENCE] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTIONS]; + plot[index].title = title.str(); + + // computation of the maximum number of occurrences and the maximum frequency + + nb_histo = 0; + max_nb_value = 0; + max_frequency = 0; + + for (i = 0;i < nb_value;i++) { + if (nb_occurrence[i]->nb_element > 0) { + nb_histo++; + + if (nb_occurrence[i]->nb_value > max_nb_value) { + max_nb_value = nb_occurrence[i]->nb_value; + } + if (nb_occurrence[i]->max > max_frequency) { + max_frequency = nb_occurrence[i]->max; + } + } + } + + plot[index].xrange = Range(0 , max_nb_value); + plot[index].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(nb_histo); + + i = 0; + shift = 0.; + + for (j = 0;j < nb_value;j++) { + if (nb_occurrence[j]->nb_element > 0) { + legend.str(""); + legend << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << j; + plot[index][i].legend = legend.str(); + + plot[index][i].style = "impulses"; + + for (k = nb_occurrence[j]->offset;k < nb_occurrence[j]->nb_value;k++) { + if (nb_occurrence[j]->frequency[k] > 0) { + plot[index][i].add_point(k + shift , nb_occurrence[j]->frequency[k]); + } + } + + if (PLOT_SHIFT * (nb_histo - 1) < PLOT_MAX_SHIFT) { + shift += PLOT_SHIFT; + } + else { + shift += PLOT_MAX_SHIFT / (nb_histo - 1); + } + + i++; + } + } + index++; + + for (i = 0;i < nb_value;i++) { + if ((nb_run[i]->nb_element > 0) && (nb_occurrence[i]->nb_element > 0)) { + + // frequency distribution of the number of runs of a category per sequence + + plot.variable[index] = variable; + plot.viewpoint[index] = COUNTING; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , nb_run[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(nb_run[i]->max * YSCALE)); + + if (nb_run[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(nb_run[i]->max * YSCALE) + 1 < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_NB_RUN_OF] + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + nb_run[i]->plotable_frequency_write(plot[index][0]); + index++; + + // frequency distribution of the number of occurrences of a category per sequence + + plot.variable[index] = variable; + plot.viewpoint[index] = COUNTING; + + if (plot.nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << variable + 1; + plot[index].title = title.str(); + } + + plot[index].xrange = Range(0 , nb_occurrence[i]->nb_value - 1); + plot[index].yrange = Range(0 , ceil(nb_occurrence[i]->max * YSCALE)); + + if (nb_occurrence[i]->nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(nb_occurrence[i]->max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_NB_OCCURRENCE_OF] + << STAT_label[type == STATE ? STATL_STATE : STATL_VALUE] << " " << i << " " + << SEQ_label[SEQL_PER_SEQUENCE] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + nb_occurrence[i]->plotable_frequency_write(plot[index][0]); + index++; + } + } + + // sequence length frequency distribution + + plot.variable[index] = variable; + plot.viewpoint[index] = COUNTING; + + plot[index].xrange = Range(0 , length_distribution.nb_value - 1); + plot[index].yrange = Range(0 , ceil(length_distribution.max * YSCALE)); + + if (length_distribution.nb_value - 1 < TIC_THRESHOLD) { + plot[index].xtics = 1; + } + if (ceil(length_distribution.max * YSCALE) < TIC_THRESHOLD) { + plot[index].ytics = 1; + } + + plot[index].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[index][0].legend = legend.str(); + + plot[index][0].style = "impulses"; + + length_distribution.plotable_frequency_write(plot[index][0]); + index++; + } +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/sequence_label.cpp b/src/cpp/sequence_analysis/sequence_label.cpp new file mode 100644 index 0000000..0f9e230 --- /dev/null +++ b/src/cpp/sequence_analysis/sequence_label.cpp @@ -0,0 +1,423 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: sequence_label.cpp 18667 2015-11-09 12:03:24Z guedon $ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +namespace sequence_analysis { + + +/**************************************************************** + * + * Key words (file format) + */ + + +const char *SEQ_word[] = { + "MARKOV_CHAIN" , + "EQUILIBRIUM_MARKOV_CHAIN" , + "HIDDEN_MARKOV_CHAIN" , + "EQUILIBRIUM_HIDDEN_MARKOV_CHAIN" , + + "SEMI-MARKOV_CHAIN" , + "EQUILIBRIUM_SEMI-MARKOV_CHAIN" , + "HIDDEN_SEMI-MARKOV_CHAIN" , + "EQUILIBRIUM_HIDDEN_SEMI-MARKOV_CHAIN" , + + "NONHOMOGENEOUS_MARKOV_CHAIN" , + "HOMOGENEOUS" , + "NONHOMOGENEOUS" , + + "OCCUPANCY_DISTRIBUTION" , + + "INDEX_PARAMETER" + +// "TOP_PARAMETERS" , +// "AXILLARY_PROBABILITY" , +// "RHYTHM_RATIO" +}; + + +const char *SEQ_index_parameter_word[] = { + " " , + "TIME" , + "TIME_INTERVAL" , + "POSITION" , + "POSITION_INTERVAL" +}; + + + +/**************************************************************** + * + * Labels + */ + + +const char *SEQ_label[] = { + "log-likelihood for the state sequence" , + "log-likelihood for the state sequences" , + "log-likelihood for the observed sequences" , + "information quantity of the sequences in the i.i.d. case" , + + "smoothed" , + "observed" , + "theoretical" , + "smoothed observed probabilities" , + + "ordinary renewal process" , + "equilibrium renewal process" , + "time between 2 observation" , + "inter-event" , + "recurrence time" , + "length-biased" , + "inter-event time censored on both ends" , + "inter-event time censored on one end" , + "complete inter-event time" , + "time up to event" , + "number of event" , + "during" , + "time unit" , + "mixture of number of event distributions" , + "no-event probability" , + "event probability" , + + "Markov chain" , + "hidden Markov chain" , + "semi-Markov chain" , + "hidden semi-Markov chain" , + + "maximum order" , + "memory tree" , + "transition tree" , + "memory transition matrix" , + "non-terminal" , + "terminal" , + "completion" , + "completed" , + "confidence intervals for transition probabilities" , + "free transient parameter" , + "free transient parameters" , + "recommended maximum order" , + "pruning threshold" , + "initial counts" , + "transition counts" , + "maximum transition count difference" , + "log-likelihoods" , + "count" , + "delta" , + "Krichevsky-Trofimov" , + "likelihood ratio test" , + + "self-transition" , + "asymptote" , + + "occupancy distribution" , + + "probability of no-occurrence of " , + "time up to the first occurrence" , + "time up to the first occurrence of " , + "probability of leaving " , + "absorption probability of " , + "biased" , + "occupancy" , + "complete/censored state occupancy weights" , + "initial run" , + "final run" , + "mixture of " , + "number of runs" , + "number of runs of " , + "number of occurrences" , + "number of occurrences of " , + "per sequence" , + "per length" , + "missing value" , + "words" , + + "state probabilities" , + "posterior state sequence probability" , + "posterior state sequence probability log ratio" , + "state begin" , + "posterior most probable initial state probability" , + "posterior state probabilities" , + "posterior in state probabilities" , + "posterior out state probabilities" , + "conditional entropy" , + "marginal entropy" , + "sum of marginal entropies" , + "partial state sequence entropy" , + "state sequence entropy" , + "state sequence divergence" , + "upper bound" , + "number of state sequences" , + "maximum posterior state probabilities" , + "maximum posterior in state probabilities" , + "maximum posterior out state probabilities" , + "likelihood ratio" , + + "correlation function" , + "partial" , + "auto" , + "cross-" , + "Pearson" , + "Spearman" , + "Kendall" , + "rank" , + "lag" , + "maximum lag" , + "autoregressive model" , + "white noise" , + "randomness 95% confidence limit" , + "pair frequency" , + + "index" , + + "simulated" , + "sequence" , + "sequences" , + "vertex identifier" , + "index parameter" , + "minimum index parameter" , + "maximum index parameter" , + "time" , + "time interval" , + "position" , + "position interval" , + "length" , + "sequence length" , + "cumulative length" , + "shift" , + + "alignment length" , + "aligned on" , + "maximum gap length" , + "alignment coding" , + "consensus" , + + "optimal" , + "change point" , + "change points" , + "change-point amplitude" , + "segment" , + "segments" , + "segment sample size" , + "global standard deviation" , + "global residual standard deviation" , + "root mean square error" , + "mean absolute error" , + "piecewise linear function" , + "confidence interval" , + "confidence intervals" , + "autoregressive coefficient" , + "number of segments" , + "posterior probability" , + "dimension jump" , + "optimal slope" , + "piecewise step function" , + "number of segmentations" , + "segmentations" , + "segmentation log-likelihood" , + "log-likelihood for all the possible segmentations" , + "change-point uncertainty intervals" , + "posterior change-point probabilities" , + "posterior segment probabilities" , + "segment length" , + "prior segment length" , + "segmentation entropy" , + "first-order dependency entropy" , + "change-point entropy" , + "uniform entropy" , + "segmentation divergence" , + "begin conditional entropy" , + "end conditional entropy" , + "maximum change-point likelihood" , + "maximum segment likelihood" , + "maximum posterior change-point probabilities" , + "maximum posterior segment probabilities" , + "ambiguity" + +// "top" , +// "tops" , +// "number of internode" +}; + + + +/**************************************************************** + * + * Error messages for lexical analysis of files + */ + + +const char *SEQ_parsing[] = { + "time data not ordered" , + "time data too large" , + "number of event data not ordered" , + + "bad state" , + "bad number of memories" , + + "time index not ordered" , + "position not ordered" , + "position not allowed" , + "bad maximum sequence length: should be greater than 1" +}; + + + +/**************************************************************** + * + * Error messages + */ + + +const char *SEQ_error[] = { + "only time interval censored on both ends: choose a longer observation period" , + "incompatible renewal data" , + "maximum number of events too small: choose a longer observation period" , + "average number of events too small: choose a longer observation period" , + "time unit too large" , + + "format error: should be a pair {observation period, number of events}" , + "bad time between two observation dates" , + "bad minimum time" , + "bad maximum time" , + "bad minimum number of events" , + "bad maximum number of events" , + "empty renewal data structure" , + "time between two observation dates too short" , + "time between two observation dates too long" , + + "bad model structure" , + "single state component" , + "bad number of states" , + "missing state" , + "bad order" , + "bad minimum order" , + "bad maximum order" , + "too many parameters" , + "overlap of values observed in the different states" , + "bad model type" , + "bad output process type" , + "no parametric output process" , + "bad minimum number of state sequences" , + "bad number of state sequences" , + "average state occupancy too short" , + "bad number of sequences" , + "bad sequence identifier" , + "bad sequence identifiers" , + "bad reference sequence identifier" , + "bad test sequence identifier" , + "bad sequence length" , + "sequence length too short" , + "sequence length too long" , + "cumulative sequence length too long" , + "variable sequence length: should be common to the individuals" , + "states not represented" , + "failure in the computation of the optimal state sequences" , + "reference model" , + "target model" , + "number of failures for the Kullback-Leibler divergence estimation" , + + "vertex identifier not allowed: change the sample order" , + "bad vertex identifier" , + "bad index parameter type" , + "bad index parameter" , + "bad state" , + "bad variable indices" , + "bad variable lag" , + "bad date order" , + "bad begin index parameter" , + "bad end index parameter" , + "bad minimum sequence length" , + "bad maximum sequence length" , + "bad maximum run length" , + "bad minimum index parameter" , + "bad maximum index parameter" , + "bad number of selected values" , + "unequal index intervals: should be equal" , + "bad number of sequences: should be > 1" , + "bad position transform step" , + "bad length" , + "bad value" , + "bad correlation coefficient type" , + "bad frequency" , + "bad maximum lag" , + "incompatible with other correlation functions" , + "bad autoregressive coefficient: should be between -1 and 1" , + "bad differencing order" , + "initial run histograms already built" , + "bad run length" , + "too high number of possible words" , + "bad minimum frequency: should be positive" , + + "state sequences not in the data" , + "characteristics not computed" , + "consecutive values from 0" , + "non-existing characteristic distribution" , + "non-existing forward sojourn time distribution" , + "bad ordering of the posterior probabilities of the most probable state sequence and the most probable initial state" , + "incompatible with model" , + "sequence incompatible with model" , + + "too many alignment" , + "bad insertion/deletion factor: should be greater than 0.5" , + "bad transposition factor: should be between 0 and 2" , + + "forbidden output" , + "bad number of segments" , + "bad minimum number of segments" , + "bad maximum number of segments" , + "bad change point" , + "segmentation failure" , + "bad number of segmentations" , + "bad change-point model" + +// "bad position" , +// "bad number of internodes" , +// "bad top identifier" , +// "bad main axe number of internodes: should be greater than the last position" , +// "bad minimum position" , +// "bad maximum position" , +// "bad neighborhood" , +// "not enough neighbors" , +// "equality of growth probabilities not possible" , +// "bad number of tops" , +// "bad number of trials" , +// "bad number of axillary shoots per node" +}; + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/sequence_label.h b/src/cpp/sequence_analysis/sequence_label.h new file mode 100644 index 0000000..eafaefe --- /dev/null +++ b/src/cpp/sequence_analysis/sequence_label.h @@ -0,0 +1,436 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: sequence_label.h 18668 2015-11-09 12:03:42Z guedon $ + * + * Forum for StructureAnalysis developers: amldevlp@cirad.fr + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef SEQUENCE_LABEL_H +#define SEQUENCE_LABEL_H + + + +namespace sequence_analysis { + + +/**************************************************************** + * + * Key word identifiers (file format) + */ + + + enum sequence_analysis_keyword { + SEQW_MARKOV_CHAIN , + SEQW_EQUILIBRIUM_MARKOV_CHAIN , + SEQW_HIDDEN_MARKOV_CHAIN , + SEQW_EQUILIBRIUM_HIDDEN_MARKOV_CHAIN , + + SEQW_SEMI_MARKOV_CHAIN , + SEQW_EQUILIBRIUM_SEMI_MARKOV_CHAIN , + SEQW_HIDDEN_SEMI_MARKOV_CHAIN , + SEQW_EQUILIBRIUM_HIDDEN_SEMI_MARKOV_CHAIN , + + SEQW_NONHOMOGENEOUS_MARKOV_CHAIN , + SEQW_HOMOGENEOUS , + SEQW_NONHOMOGENEOUS , + + SEQW_OCCUPANCY_DISTRIBUTION , + + SEQW_INDEX_PARAMETER + +// SEQW_TOP_PARAMETERS , +// SEQW_AXILLARY_PROBABILITY , +// SEQW_RHYTHM_RATIO + }; + + + extern const char *SEQ_word[]; + extern const char *SEQ_index_parameter_word[]; + + + +/**************************************************************** + * + * Label identifiers + */ + + + enum sequence_analysis_label { + SEQL_STATE_SEQUENCE_LIKELIHOOD , + SEQL_STATE_SEQUENCES_LIKELIHOOD , + SEQL_OBSERVED_SEQUENCES_LIKELIHOOD , + SEQL_IID_INFORMATION , + + SEQL_SMOOTHED , + SEQL_OBSERVED , + SEQL_THEORETICAL , + SEQL_SMOOTHED_OBSERVED_PROBABILITIES , + + SEQL_ORDINARY_RENEWAL , + SEQL_EQUILIBRIUM_RENEWAL , + SEQL_OBSERVATION_TIME , + SEQL_INTER_EVENT , + SEQL_RECURRENCE_TIME , + SEQL_LENGTH_BIASED , + SEQL_2_CENSORED_INTER_EVENT , + SEQL_1_CENSORED_INTER_EVENT , + SEQL_COMPLETE_INTER_EVENT , + SEQL_TIME_UP , + SEQL_NB_EVENT , + SEQL_DURING , + SEQL_TIME_UNIT , + SEQL_NB_EVENT_MIXTURE , + SEQL_NO_EVENT_PROBABILITY , + SEQL_EVENT_PROBABILITY , + + SEQL_MARKOV_CHAIN , + SEQL_HIDDEN_MARKOV_CHAIN , + SEQL_SEMI_MARKOV_CHAIN , + SEQL_HIDDEN_SEMI_MARKOV_CHAIN , + + SEQL_MAX_ORDER , + SEQL_MEMORY_TREE , + SEQL_TRANSITION_TREE , + SEQL_MEMORY_TRANSITION_MATRIX , + SEQL_NON_TERMINAL , + SEQL_TERMINAL , + SEQL_COMPLETION , + SEQL_COMPLETED , + SEQL_TRANSITION_PROBABILITIY_CONFIDENCE_INTERVAL , + SEQL_FREE_TRANSIENT_PARAMETER , + SEQL_FREE_TRANSIENT_PARAMETERS , + SEQL_RECOMMENDED_MAX_ORDER , + SEQL_PRUNING_THRESHOLD , + SEQL_INITIAL_COUNTS , + SEQL_TRANSITION_COUNTS , + SEQL_MAX_TRANSITION_COUNT_DIFFERENCE , + SEQL_LIKELIHOODS , + SEQL_COUNT , + SEQL_DELTA , + SEQL_KRICHEVSKY_TROFIMOV , + SEQL_LIKELIHOOD_RATIO_TEST , + + SEQL_SELF_TRANSITION , + SEQL_ASYMPTOTE , + + SEQL_OCCUPANCY_DISTRIBUTION , + + SEQL_NO_OCCURRENCE , + SEQL_FIRST_OCCURRENCE , + SEQL_FIRST_OCCURRENCE_OF , + SEQL_LEAVING , + SEQL_ABSORPTION , + SEQL_BIASED , + SEQL_OCCUPANCY , + SEQL_OCCUPANCY_WEIGHTS , + SEQL_INITIAL_RUN , + SEQL_FINAL_RUN , + SEQL_MIXTURE_OF , + SEQL_NB_RUN , + SEQL_NB_RUN_OF , + SEQL_NB_OCCURRENCE , + SEQL_NB_OCCURRENCE_OF , + SEQL_PER_SEQUENCE , + SEQL_PER_LENGTH , + SEQL_MISSING_VALUE , + SEQL_WORDS , + + SEQL_STATE_PROBABILITY , + SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY , + SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY_LOG_RATIO , + SEQL_STATE_BEGIN , + SEQL_POSTERIOR_INITIAL_STATE_PROBABILITY , + SEQL_POSTERIOR_STATE_PROBABILITY , + SEQL_POSTERIOR_IN_STATE_PROBABILITY , + SEQL_POSTERIOR_OUT_STATE_PROBABILITY , + SEQL_CONDITIONAL_ENTROPY , + SEQL_MARGINAL_ENTROPY , + SEQL_MARGINAL_ENTROPY_SUM , + SEQL_PARTIAL_STATE_SEQUENCE_ENTROPY , + SEQL_STATE_SEQUENCE_ENTROPY , + SEQL_STATE_SEQUENCE_DIVERGENCE , + SEQL_UPPER_BOUND , + SEQL_NB_STATE_SEQUENCE , + SEQL_MAX_POSTERIOR_STATE_PROBABILITY , + SEQL_MAX_POSTERIOR_IN_STATE_PROBABILITY , + SEQL_MAX_POSTERIOR_OUT_STATE_PROBABILITY , + SEQL_LIKELIHOOD_RATIO , + + SEQL_CORRELATION_FUNCTION , + SEQL_PARTIAL , + SEQL_AUTO , + SEQL_CROSS , + SEQL_PEARSON , + SEQL_SPEARMAN , + SEQL_KENDALL , + SEQL_RANK , + SEQL_LAG , + SEQL_MAX_LAG , + SEQL_AUTOREGRESSIVE_MODEL , + SEQL_WHITE_NOISE , + SEQL_RANDOMNESS_95_CONFIDENCE_LIMIT , + SEQL_PAIR_FREQUENCY , + + SEQL_INDEX , + + SEQL_SIMULATED , + SEQL_SEQUENCE , + SEQL_SEQUENCES , + SEQL_VERTEX_IDENTIFIER , + SEQL_INDEX_PARAMETER , + SEQL_MIN_INDEX_PARAMETER , + SEQL_MAX_INDEX_PARAMETER , + SEQL_TIME , + SEQL_TIME_INTERVAL , + SEQL_POSITION , + SEQL_POSITION_INTERVAL , + SEQL_LENGTH , + SEQL_SEQUENCE_LENGTH , + SEQL_CUMUL_LENGTH , + SEQL_SHIFT , + + SEQL_ALIGNMENT_LENGTH , + SEQL_ALIGNED_ON , + SEQL_MAX_GAP_LENGTH , + SEQL_ALIGNMENT_CODING , + SEQL_CONSENSUS , + + SEQL_OPTIMAL , + SEQL_CHANGE_POINT , + SEQL_CHANGE_POINTS , + SEQL_CHANGE_POINT_AMPLITUDE , + SEQL_SEGMENT , + SEQL_SEGMENTS , + SEQL_SEGMENT_SAMPLE_SIZE , + SEQL_GLOBAL_STANDARD_DEVIATION , + SEQL_GLOBAL_RESIDUAL_STANDARD_DEVIATION , + SEQL_ROOT_MEAN_SQUARE_ERROR , + SEQL_MEAN_ABSOLUTE_ERROR , + SEQL_PIECEWISE_LINEAR_FUNCTION , + SEQL_CONFIDENCE_INTERVAL , + SEQL_CONFIDENCE_INTERVALS , + SEQL_AUTOREGRESSIVE_COEFF , + SEQL_NB_SEGMENT , + SEQL_POSTERIOR_PROBABILITY , + SEQL_DIMENSION_JUMP , + SEQL_OPTIMAL_SLOPE , + SEQL_PIECEWISE_STEP_FUNCTION , + SEQL_NB_SEGMENTATION , + SEQL_SEGMENTATIONS , + SEQL_SEGMENTATION_LIKELIHOOD , + SEQL_POSSIBLE_SEGMENTATION_LIKELIHOOD , + SEQL_CHANGE_POINT_CREDIBILITY_INTERVALS , + SEQL_POSTERIOR_CHANGE_POINT_PROBABILITY , + SEQL_POSTERIOR_SEGMENT_PROBABILITY , + SEQL_SEGMENT_LENGTH , + SEQL_PRIOR_SEGMENT_LENGTH , + SEQL_SEGMENTATION_ENTROPY , + SEQL_FIRST_ORDER_ENTROPY , + SEQL_CHANGE_POINT_ENTROPY , + SEQL_UNIFORM_ENTROPY , + SEQL_SEGMENTATION_DIVERGENCE , + SEQL_BEGIN_CONDITIONAL_ENTROPY , + SEQL_END_CONDITIONAL_ENTROPY , + SEQL_MAX_CHANGE_POINT_LIKELIHOOD , + SEQL_MAX_SEGMENT_LIKELIHOOD , + SEQL_MAX_POSTERIOR_CHANGE_POINT_PROBABILITY , + SEQL_MAX_POSTERIOR_SEGMENT_PROBABILITY , + SEQL_AMBIGUITY + +// SEQL_TOP , +// SEQL_TOPS , +// SEQL_NB_INTERNODE +}; + + + extern const char *SEQ_label[]; + + + +/**************************************************************** + * + * Identifiers of error messages for lexical analysis of files + */ + + + enum sequence_analysis_parsing { + SEQP_TIME_ORDER , + SEQP_MAX_TIME , + SEQP_NB_EVENT_ORDER , + + SEQP_STATE , + SEQP_NB_MEMORY , + + SEQP_TIME_INDEX_ORDER , + SEQP_POSITION_ORDER , + SEQP_POSITION , + SEQP_MAX_SEQUENCE_LENGTH + }; + + + extern const char *SEQ_parsing[]; + + + +/**************************************************************** + * + * Identifiers of error messages + */ + + + enum sequence_analysis_error { + SEQR_BOTH_END_CENSORED_INTERVAL , + SEQR_INCOMPATIBLE_RENEWAL_DATA , + SEQR_MAX_NB_EVENT_TOO_SMALL , + SEQR_NB_EVENT_TOO_SMALL , + SEQR_TIME_UNIT , + + SEQR_TIME_NB_EVENT_PAIR , + SEQR_OBSERVATION_TIME , + SEQR_MIN_TIME , + SEQR_MAX_TIME , + SEQR_MIN_NB_EVENT , + SEQR_MAX_NB_EVENT , + SEQR_EMPTY_RENEWAL_DATA , + SEQR_SHORT_OBSERVATION_TIME , + SEQR_LONG_OBSERVATION_TIME , + + SEQR_MODEL_STRUCTURE , + SEQR_SINGLE_STATE_COMPONENT , + SEQR_NB_STATE , + SEQR_MISSING_STATE , + SEQR_ORDER , + SEQR_MIN_ORDER , + SEQR_MAX_ORDER , + SEQR_NB_PARAMETER , + SEQR_OVERLAP , + SEQR_MODEL_TYPE , + SEQR_OUTPUT_PROCESS_TYPE , + SEQR_PARAMETRIC_PROCESS , + SEQR_MIN_NB_STATE_SEQUENCE , + SEQR_NB_STATE_SEQUENCE , + SEQR_OCCUPANCY , + SEQR_NB_SEQUENCE , + SEQR_SEQUENCE_IDENTIFIER , + SEQR_SEQUENCE_IDENTIFIERS , + SEQR_REF_SEQUENCE_IDENTIFIER , + SEQR_TEST_SEQUENCE_IDENTIFIER , + SEQR_SEQUENCE_LENGTH , + SEQR_SHORT_SEQUENCE_LENGTH , + SEQR_LONG_SEQUENCE_LENGTH , + SEQR_CUMUL_SEQUENCE_LENGTH , + SEQR_VARIABLE_SEQUENCE_LENGTH , + SEQR_STATES_NOT_REPRESENTED , + SEQR_STATE_SEQUENCE_COMPUTATION_FAILURE , + SEQR_REFERENCE_MODEL , + SEQR_TARGET_MODEL , + SEQR_DIVERGENCE_NB_FAILURE , + + SEQR_SAMPLE_VERTEX_IDENTIFIER , + SEQR_VERTEX_IDENTIFIER , + SEQR_INDEX_PARAMETER_TYPE , + SEQR_INDEX_PARAMETER , + SEQR_STATE , + SEQR_VARIABLE_INDICES , + SEQR_VARIABLE_LAG , + SEQR_DATE_ORDER , + SEQR_BEGIN_INDEX_PARAMETER , + SEQR_END_INDEX_PARAMETER , + SEQR_MIN_SEQUENCE_LENGTH , + SEQR_MAX_SEQUENCE_LENGTH , + SEQR_MAX_RUN_LENGTH , + SEQR_MIN_INDEX_PARAMETER , + SEQR_MAX_INDEX_PARAMETER , + SEQR_NB_SELECTED_VALUE , + SEQR_UNEQUAL_INDEX_INTERVALS , + SEQR_SINGLE_SEQUENCE , + SEQR_POSITION_STEP , + SEQR_LENGTH , + SEQR_VALUE , + SEQR_CORRELATION_COEFF_TYPE , + SEQR_FREQUENCY , + SEQR_MAX_LAG , + SEQR_INCOMPATIBLE_CORRELATION_FUNCTION , + SEQR_AUTOREGRESSIVE_COEFF , + SEQR_DIFFERENCING_ORDER , + SEQR_INITIAL_RUN_ALREADY_BUILT , + SEQR_RUN_LENGTH , + SEQR_MAX_NB_WORD , + SEQR_MIN_FREQUENCY , + + SEQR_STATE_SEQUENCES , + SEQR_CHARACTERISTICS_NOT_COMPUTED , + SEQR_CONSECUTIVE_VALUES , + SEQR_NON_EXISTING_CHARACTERISTIC_DISTRIBUTION , + SEQR_NON_EXISTING_FORWARD_DISTRIBUTION , + SEQR_POSTERIOR_PROBABILITY_ORDER , + SEQR_INCOMPATIBLE_MODEL , + SEQR_SEQUENCE_INCOMPATIBLE_MODEL , + + SEQR_NB_ALIGNMENT , + SEQR_INDEL_FACTOR , + SEQR_TRANSPOSITION_FACTOR , + + SEQR_FORBIDDEN_OUTPUT , + SEQR_NB_SEGMENT , + SEQR_MIN_NB_SEGMENT , + SEQR_MAX_NB_SEGMENT , + SEQR_CHANGE_POINT , + SEQR_SEGMENTATION_FAILURE , + SEQR_NB_SEGMENTATION , + SEQR_CHANGE_POINT_MODEL + +// SEQR_POSITION , +// SEQR_NB_INTERNODE , +// SEQR_TOP_IDENTIFIER , +// SEQR_MAIN_AXE_NB_INTERNODE , +// SEQR_MIN_POSITION , +// SEQR_MAX_POSITION , +// SEQR_NEIGHBORHOOD , +// SEQR_NEIGHBORS , +// SEQR_EQUAL_PROBABILITY , +// SEQR_NB_TOP , +// SEQR_NB_TRIAL , +// SEQR_NB_AXILLARY_SHOOT + }; + + + extern const char *SEQ_error[]; + + +}; // namespace sequence_analysis + + + +#endif diff --git a/src/cpp/sequence_analysis/sequences.h b/src/cpp/sequence_analysis/sequences.h new file mode 100644 index 0000000..b7b1619 --- /dev/null +++ b/src/cpp/sequence_analysis/sequences.h @@ -0,0 +1,1448 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: sequences.h 18659 2015-11-03 07:28:02Z guedon $ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef SEQUENCES_H +#define SEQUENCES_H + + +#include "stat_tool/curves.h" +#include "stat_tool/distribution.h" +#include "stat_tool/markovian.h" +// #include "stat_tool/vectors.h" +#include "stat_tool/regression.h" + + +namespace sequence_analysis { + + + +/**************************************************************** + * + * Constants + */ + + + const int DEFAULT_LENGTH = 20; // default sequence length + + const int SEQUENCE_NB_VARIABLE = 30; // maximum number of variables + + const int PLOT_NB_SEQUENCE = 200; // maximum number of plotted sequences (Gnuplot output) + const int PLOT_LEGEND_NB_SEQUENCE = 15; // maximum number of labeled sequences (Gnuplot output) + const double GROWTH_FACTOR = 1.; // growth factor for computing the first relative growth rate + + enum index_parameter_type { + IMPLICIT_TYPE , // implicit index parameter + TIME , // time + TIME_INTERVAL , // time interval + POSITION , // position + POSITION_INTERVAL // between-position interval + }; + + enum sequence_pattern { + LENGTH_PATTERN , + SEQUENCE_CUMUL , + SEQUENCE_MEAN , + FIRST_OCCURRENCE_PATTERN , + SOJOURN_TIME_PATTERN , + NB_RUN_PATTERN , + NB_OCCURRENCE_PATTERN + }; + + enum run_position { + BEGIN_RUN , + END_RUN , + }; + + enum correlation_variable_type { + OBSERVED_VALUE , + OBSERVED_STATE , + THEORETICAL_STATE , + OBSERVED_OUTPUT , + THEORETICAL_OUTPUT + }; + + enum autocorrelation_function_type { + AUTOREGRESSIVE , + WHITE_NOISE , + VOID + }; + + enum sequence_transformation { + SEQUENCE_COPY , + REVERSE , + ADD_STATE_VARIABLE , + EXPLICIT_INDEX_PARAMETER , + REMOVE_INDEX_PARAMETER + }; + + enum categorical_sequence_process_transformation { + CATEGORICAL_SEQUENCE_PROCESS_COPY , + INIT_OCCUPANCY + }; + + enum initial_run { + UNCHANGED , + ADD_INITIAL_RUN , + REMOVE_INITIAL_RUN + }; + + enum memory_tree_selection { + CTM_BIC , // Context Tree Maximizing/BIC algorithm + CTM_KT , // Context Tree Maximizing/Krichevsky-Trofimov algorithm + LOCAL_BIC , // recursive pruning/BIC algorithm + CONTEXT // Context algorithm + }; + + enum transition_estimator { + MAXIMUM_LIKELIHOOD , + LAPLACE , + ADAPTATIVE_LAPLACE , + UNIFORM_SUBSET , + UNIFORM_CARDINALITY + }; + + enum segment_model { + CATEGORICAL_CHANGE , + POISSON_CHANGE , + NEGATIVE_BINOMIAL_0_CHANGE , + NEGATIVE_BINOMIAL_1_CHANGE , + ORDINAL_GAUSSIAN_CHANGE , + GAUSSIAN_CHANGE , + MEAN_CHANGE , + VARIANCE_CHANGE , + LINEAR_MODEL_CHANGE , + INTERCEPT_SLOPE_CHANGE , + AUTOREGRESSIVE_MODEL_CHANGE , + STATIONARY_AUTOREGRESSIVE_MODEL_CHANGE , + BAYESIAN_POISSON_CHANGE , + BAYESIAN_GAUSSIAN_CHANGE + }; + + const double PRIOR_VARIANCE_FACTOR = 100.; // factor for deducing the variance of + // the gamma prior distribution + const double PRIOR_SAMPLE_SIZE = 1.; // hyperparameter of the Gaussian-gamma prior distribution + const double PRIOR_DEGREES_OF_FREEDOM = 2.; // hyperparameter of the Gaussian-gamma prior distribution + const double PRIOR_DISPERSION_FACTOR = 10.; // factor for deducing the sum of squared deviations + // of the prior distribution + const int PRIOR_PRECISION = 2; // hyperparameter precision + + const double MAX_NB_WORD = 1.e7; // maximum number of words + + const double STATIONARY_PROBABILITY_THRESHOLD = 1.e-8; // threshold for the computation of the stationary distribution + // of an equilibrium model + const int STATIONARY_PROBABILITY_LENGTH = 10000; // maximum length for the computation of the stationary distribution + // of an equilibrium model + const double LEAVE_INCREMENT = 1.e-6; // threshold for stopping the computation of the probability + // of leaving definitively a state/observed category + + const double CTM_BIC_THRESHOLD = 6.; // threshold for memory pruning + const double CTM_KT_THRESHOLD = 12.; // threshold for memory pruning + const double LOCAL_BIC_THRESHOLD = 10.; // threshold for memory pruning + const double CONTEXT_THRESHOLD = 5.; // threshold for memory pruning + + const int MIN_NB_STATE_SEQUENCE = 1; // number of state sequences, 1st iteration of the MCEM algorithm + const int MAX_NB_STATE_SEQUENCE = 10; // maximum number of state sequences (MCEM algorithm) + const double NB_STATE_SEQUENCE_PARAMETER = 1.; // parameter for the number of state sequences (MCEM algorithm) + + const double OCCUPANCY_THRESHOLD = 0.99999; // threshold on the cumulative distribution function for determining + // the upper bound of the support of a state occupancy distribution + const double OCCUPANCY_MEAN = 10.; // mean state occupancy + + const int POSTERIOR_PROBABILITY_NB_SEQUENCE = 300; // maximum number of sequences for the output of + // the posterior probabilities of the most probable state sequences + + const int NB_STATE_SEQUENCE = 10; // number of computed state sequences + + const int COUNTING_FREQUENCY_MAX_LENGTH = 10000; // maximum sequence length for the extraction of the counting frequency distributions + const int COUNTING_MAX_LENGTH = 500; // maximum sequence length for the computation of the counting distributions + + const int NB_SEQUENCE = 100000; // maximum number of generated sequences + const int MAX_LENGTH = 1000000; // maximum generated sequence length + const int CUMUL_LENGTH = 1000000; // maximum cumulative generated sequence length + + const double RESIDUAL_STANDARD_DEVIATION_COEFF = 1.e-6; // threshold for the estimation of the residual standard deviation + + enum sequence_type { + SEQUENCE , + SEQUENCE_SAMPLE , + TREND , + SUBTRACTION_RESIDUAL , + ABSOLUTE_RESIDUAL , + DIVISION_RESIDUAL , + STANDARDIZED_RESIDUAL , + SEGMENTATION_ENTROPY , + SEGMENTATION_DIVERGENCE , + LOG_LIKELIHOOD_SLOPE + }; + + enum output_sequence_format { + LINE , + COLUMN , + VECTOR , + ARRAY , + POSTERIOR_PROBABILITY + }; + + enum insertion_deletion_cost { + ADAPTATIVE , + FIXED + }; + + enum edit_operation { + DELETION , + BEGIN_END_DELETION , + INSERTION , + BEGIN_END_INSERTION , + MATCH , + SUBSTITUTION , + TRANSPOSITION + }; + + enum multiple_alignment { + DATA , + GAP , + BEGIN_END_GAP + }; + + const int NB_ALIGNMENT = 1000000; // maximum number of alignments + const int DISPLAY_NB_ALIGNMENT = 30; // maximum number of alignments for the detailed displayed output + const int FILE_NB_ALIGNMENT = 300; // maximum number of alignments for the detailed file output + + const double INDEL_FACTOR_1 = 0.51; // factor for deducing the insertion/deletion cost - simple alignement + const double INDEL_FACTOR_N = 0.51; // factor for deducing le cout d'insertion/deletion cost - multiple alignement + const double TRANSPOSITION_FACTOR = 0.; // factor for deducing the transposition cost + const double INDEL_DISTANCE = 1.0; // insertion/deletion cost + + enum change_point_profile { + CHANGE_POINT , + SEGMENT + }; + + const double CHANGE_POINT_CREDIBILITY_PROBABILITY = 0.05; // probability for defining change-point credibility intervals + const int SEQUENCE_MAX_NB_COLUMN = 20; // threshold for the writing of sequences in text files + const double ROUNDOFF_ERROR = 1.e-10; // error on a sum of doubles + const int PENALTY_SHAPE_SCALING_FACTOR = 100; // scaling factor for the plot of log-likelihoods + const int NB_SEGMENTATION = 10; // number of computed segmentations + const int SLOPE_NB_SEGMENT_RANGE = 5; // minimum number of points for computing the log-likelihood slope + const int DIMENSION_JUMP_NB_SEGMENT = SLOPE_NB_SEGMENT_RANGE + 2; // minimum number of segments for the dimension jump method + const double SLOPE_STEP = 0.01; // slope step for the dimension jump method + const double MAX_SLOPE = 100.; // maximum slope for the dimension jump method + const int MIN_DIMENSION_JUMP = 2; // minimum dimension jump + const double MIN_RANK_SQUARE_SUM = 1.e-2; // default value in the case of a unique rank in a segment + + enum correlation_normalization { + APPROXIMATED , + EXACT + }; + + const double CORRELATION_FREQUENCY_RATIO = 0.1; // frequency ratio for stopping the computation of + // the correlation function + const double AUTOCORRELATION_FREQUENCY_RATIO = 0.2; // frequency ratio for stopping the computation of + // the state autocorrelation function + const int AUTOCORRELATION_MIN_FREQUENCY = 20; // minimum frequency for stopping the computation of + // the state autocorrelation function + + const int MAX_DIFFERENCING_ORDER = 3; // maximum differentiation order + const int POINTWISE_AVERAGE_NB_SEQUENCE = 250; // maximum number of written sequences for the file output + const int ABSORBING_RUN_LENGTH = 5; // default length of the final absorbing run + const int MAX_ABSORBING_RUN_LENGTH = 20; // maximum length of the final absorbing run + const double ABSORBING_RUN_STANDARD_DEVIATION_FACTOR = 10.; // factor for defining the standard deviation of + // true final absorbing runs + + + +/**************************************************************** + * + * Class definition + */ + + + class VariableOrderMarkovChain; + class VariableOrderMarkov; + class VariableOrderMarkovData; + class VariableOrderMarkovIterator; + class HiddenVariableOrderMarkov; + class SemiMarkov; + class SemiMarkovData; + class SemiMarkovIterator; + class HiddenSemiMarkov; + class NonhomogeneousMarkov; + class NonhomogeneousMarkovData; + class Sequences; + class SequenceCharacteristics; + + class Switching_sequence; // addition of Florence Chaubert + + /// \brief Categorical observation process for sequences + + class CategoricalSequenceProcess : public stat_tool::CategoricalProcess { + + public : + + stat_tool::Distribution *length; ///< sequence length distribution + stat_tool::Curves *index_value; ///< probabilities of each category as a function of the index parameter + double *no_occurrence; ///< probabilities of not observing each category + stat_tool::Distribution **first_occurrence; ///< time to the 1st occurrence distributions + double *leave; ///< probabilities of leaving definitively each category + stat_tool::Distribution **recurrence_time; ///< recurrence time distributions + double *absorption; ///< absorbing probabilities + stat_tool::DiscreteParametric **sojourn_time; ///< sojourn time distributions + stat_tool::Distribution **nb_run; ///< number of runs per sequence distributions + stat_tool::Distribution **nb_occurrence; ///< number of occurrences per sequence distributions + + void create_characteristic(const stat_tool::Distribution &ilength , bool* homogeneity , + bool counting_flag = true); + void create_characteristic(const stat_tool::Distribution &ilength , bool sojourn_time_flag = true , + bool counting_flag = true); + void copy(const CategoricalSequenceProcess &process , bool characteristic_flag = true); + void init_occupancy(const CategoricalSequenceProcess &process , int occupancy_nb_value); + void remove(); + + CategoricalSequenceProcess(int inb_state = 0 , int inb_value = 0 , + int observation_flag = false); + CategoricalSequenceProcess(int inb_state , stat_tool::DiscreteParametric **occupancy); + CategoricalSequenceProcess(const stat_tool::CategoricalProcess &process); + CategoricalSequenceProcess(const CategoricalSequenceProcess &process , + categorical_sequence_process_transformation transform = CATEGORICAL_SEQUENCE_PROCESS_COPY , + int param = true); + ~CategoricalSequenceProcess(); + CategoricalSequenceProcess& operator=(const CategoricalSequenceProcess &process); + + stat_tool::Distribution* weight_computation() const; + + static bool test_hidden(int nb_output_process , CategoricalSequenceProcess **process); + + static CategoricalSequenceProcess* occupancy_parsing(stat_tool::StatError &error , ifstream &in_file , + int &line , const stat_tool::Chain &chain , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + + std::ostream& ascii_print(std::ostream &os , int process , + stat_tool::FrequencyDistribution **empirical_observation , + stat_tool::FrequencyDistribution *marginal_distribution , + const SequenceCharacteristics *characteristics , bool exhaustive , + bool file_flag , stat_tool::Forward **forward = NULL) const; + std::ostream& spreadsheet_print(std::ostream &os , int process , + stat_tool::FrequencyDistribution **empirical_observation = NULL , + stat_tool::FrequencyDistribution *marginal_distribution = NULL , + const SequenceCharacteristics *characteristics = NULL , + stat_tool::Forward **forward = NULL) const; + bool plot_print(const char *prefix , const char *title , int process , + stat_tool::FrequencyDistribution **empirical_observation = NULL , + stat_tool::FrequencyDistribution *marginal_distribution = NULL , + const SequenceCharacteristics *characteristics = NULL , + const stat_tool::FrequencyDistribution *length_distribution = NULL , + stat_tool::Forward **forward = NULL) const; + void plotable_write(stat_tool::MultiPlotSet &plot , int &index , int process , + stat_tool::FrequencyDistribution **empirical_observation = NULL , + stat_tool::FrequencyDistribution *marginal_distribution = NULL , + const SequenceCharacteristics *characteristics = NULL , + const stat_tool::FrequencyDistribution *length_distribution = NULL , + stat_tool::Forward **forward = NULL) const; + }; + + + /// \brief Correlation functions + + class Correlation : public stat_tool::StatInterface , public stat_tool::Curves { + + friend class VariableOrderMarkovChain; + friend class VariableOrderMarkov; + friend class Sequences; + + friend std::ostream& operator<<(std::ostream &os , const Correlation &correlation) + { return correlation.ascii_write(os); } + + private : + + stat_tool::correlation_type type; ///< correlation coefficient type (PEARSON/SPEARMAN/KENDALL) + correlation_variable_type *variable_type; ///< variable types (OBSERVED/THEORETICAL STATE/OUTPUT) + int *variable1; ///< 1st variables + int *variable2; ///< 2nd variables + autocorrelation_function_type function_type; ///< theoretical correlation function type (AUTOREGRESSIVE/WHITE_NOISE) + double *theoretical_function; ///< theoretical correlation function for a first-order autoregressive model or a filtered white noise + + void copy(const Correlation &correl); + void remove(); + bool plot_print(const char *path , double *confidence_limit) const; + + public : + + Correlation(); + Correlation(stat_tool::correlation_type itype , int max_lag , int ivariable1 , int ivariable2); + Correlation(int inb_curve , int ilength , bool frequency_flag , stat_tool::correlation_type itype); + Correlation(const Correlation &correl) + :stat_tool::Curves(correl) { copy(correl); } + ~Correlation(); + Correlation& operator=(const Correlation &correl); + + Correlation* merge(stat_tool::StatError &error , int nb_correl , const Correlation **icorrel) const; + + std::ostream& line_write(std::ostream &os) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + bool autoregressive_model_autocorrelation(stat_tool::StatError &error , double autoregressive_coeff); + bool white_noise_correlation(stat_tool::StatError &error , int nb_point , double *filter , + int residual = true); + bool white_noise_correlation(stat_tool::StatError &error , const stat_tool::Distribution &dist); + bool white_noise_correlation(stat_tool::StatError &error , int order); + + // class member access + + stat_tool::correlation_type get_type() const { return type; } + correlation_variable_type get_variable_type(int index) const { return variable_type[index]; } + int get_variable1(int index) const { return variable1[index]; } + int get_variable2(int index) const { return variable2[index]; } + double get_theoretical_function(int lag) const { return theoretical_function[lag]; } + }; + + + class MarkovianSequences; + class TimeEvents; + class RenewalData; + + /*! \brief Sequences + * + * Sequences implements a collection of multivariate sequences. + * Variables can be either integers, doubles or indices (int). + * Integer and double variables are stored in int_sequence and real_sequence, + * respectively. Both have the same numbers of variables; if type[i] == INT_VALUE, + * real_sequence[s][i] == NULL for all sequences s, and vice versa if + * type[i] == REAL_VALUE. + * real_sequence[s][i][p] represents the value for sequence s for variable i + * (assumed to be REAL_VALUE) at position p + * index_parameter is the sequence index, if explicit, it has to be non-decreasing. + * */ + + class Sequences : public stat_tool::StatInterface { + + friend std::ostream& operator<<(std::ostream &os , const Sequences &seq) + { return seq.ascii_write(os); } + + protected : + + int nb_sequence; ///< number of sequences + int *identifier; ///< sequence identifiers + int max_length; ///< maximum sequence length + int cumul_length; ///< cumulative sequence length + int *length; ///< sequence lengths + stat_tool::FrequencyDistribution *length_distribution; ///< sequence length frequency distribution + int **vertex_identifier; ///< identifiers of vertices of an associated MTG + index_parameter_type index_param_type; ///< index parameter type (TIME/POSITION) + stat_tool::FrequencyDistribution *index_parameter_distribution; ///< explicit index parameter frequency distribution + stat_tool::FrequencyDistribution *index_interval; ///< frequency distribution of the intervals between explicit index parameters + int **index_parameter; ///< explicit index parameters + int nb_variable; ///< number of variables + stat_tool::variable_nature *type; ///< variable types (INT_VALUE/REAL_VALUE/STATE) + double *min_value; ///< minimum value for each variable + double *max_value; ///< maximum value for each variable + stat_tool::FrequencyDistribution **marginal_distribution; ///< marginal frequency distributions + stat_tool::Histogram **marginal_histogram; ///< marginal histograms + int ***int_sequence; ///< sequences, integer-valued variables + double ***real_sequence; ///< sequences, real-valued variables + + void init(int inb_sequence , int *iidentifier , int *ilength , int **ivertex_identifier , + index_parameter_type iindex_param_type , int inb_variable , stat_tool::variable_nature *itype , + bool vertex_identifier_copy , bool init_flag); + void init(int inb_sequence , int *iidentifier , int *ilength , int inb_variable , + bool init_flag); + void copy(const Sequences &seq); + void reverse(const Sequences &seq); + void add_state_variable(const Sequences &seq); + void explicit_index_parameter(const Sequences &seq); + void remove_index_parameter(const Sequences &seq); + void remove(); + + bool increasing_index_parameter_checking(stat_tool::StatError &error , bool strict , + const char *pattern_label) const; + bool increasing_sequence_checking(stat_tool::StatError &error , int variable , bool strict , + const char *pattern_label , const char *variable_label) const; + + void cluster(const Sequences &seq , int variable , int step , stat_tool::rounding mode); + void transcode(const Sequences &seq , int ivariable , int min_category , int max_category , + int *category , bool add_variable = false); + void cluster(const Sequences &seq , int variable , int nb_class , double *limit); + void select_variable(const Sequences &seq , int *variable); + + bool pointwise_average_ascii_print(stat_tool::StatError &error , const std::string path , int *frequency , + bool dispersion , sequence_type output) const; + bool pointwise_average_spreadsheet_print(stat_tool::StatError &error , const std::string path , int *frequency , + bool dispersion , sequence_type output) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive , bool comment_flag) const; + std::ostream& ascii_print(std::ostream &os , output_sequence_format format , bool comment_flag , + double *posterior_probability = NULL , double *entropy = NULL , + double *nb_state_sequence = NULL , double *posterior_state_probability = NULL , + int line_nb_character = stat_tool::LINE_NB_CHARACTER) const; + bool plot_print(const char *path , int ilength) const; + + void max_length_computation(); + void cumul_length_computation(); + void build_length_frequency_distribution(); + + void index_parameter_computation(); + void build_index_parameter_frequency_distribution(); + void index_interval_computation(); + + void min_value_computation(int variable); + void max_value_computation(int variable); + void build_marginal_frequency_distribution(int variable); + void build_marginal_histogram(int variable , double bin_width = stat_tool::D_DEFAULT , + double imin_value = stat_tool::D_INF); + + std::ostream& alignment_ascii_print(std::ostream &os , int width , int ref_index , int test_index , + const Sequences &alignment , int alignment_index) const; + std::ostream& alignment_spreadsheet_print(std::ostream &os , int ref_index , int test_index , + const Sequences &alignment , int alignment_index) const; + + double indel_distance_computation(const stat_tool::VectorDistance &vector_dist , + double **rank , double **max_category_distance) const; + double indel_distance_computation(const stat_tool::VectorDistance &vector_dist , + int index , int position , double **rank , + double **max_category_distance) const; + double substitution_distance_computation(const stat_tool::VectorDistance &vector_dist , int ref_index , + int test_index , int ref_position , int test_position , + double **rank , const Sequences *test_seq = NULL) const; + double substitution_distance_computation(int ref_index , int test_index , int ref_position, + int test_position , double substitution_distance) const; + + std::ostream& multiple_alignment_ascii_print(std::ostream &os) const; + bool multiple_alignment_ascii_print(stat_tool::StatError &error , const std::string path) const; + + Sequences* multiple_alignment(const Sequences &test_seq , const stat_tool::VectorDistance &vector_dist , + double **rank , double **max_category_distance , bool begin_free , + bool end_free , insertion_deletion_cost indel_cost , double indel_factor) const; + + void correlation_computation(Correlation &correl , int variable1 , int variable2 , + correlation_normalization normalization , bool individual_mean = false) const; + + std::ostream& profile_ascii_print(std::ostream &os , int index , int nb_segment , + double **profiles , const char *label , + double **piecewise_function = NULL , long double **change_point = NULL , + stat_tool::Distribution **segment_length = NULL , + stat_tool::Distribution *prior_segment_length = NULL , + long double **begin_conditonal_entropy = NULL , + long double **end_conditional_entropy = NULL , + long double **change_point_entropy = NULL) const; + std::ostream& profile_spreadsheet_print(std::ostream &os , int index , int nb_segment , + double **profiles , const char *label , + bool common_contrast = true , double ***piecewise_function = NULL , + long double **change_point = NULL , + stat_tool::Distribution **segment_length = NULL , + stat_tool::Distribution *prior_segment_length = NULL , + long double **begin_conditonal_entropy = NULL , + long double **end_conditional_entropy = NULL , + long double **change_point_entropy = NULL) const; + std::ostream& profile_plot_print(std::ostream &os , int index , int nb_segment , + double **profiles , bool common_contrast = true , + double ***piecewise_function = NULL , + long double **change_point = NULL , + stat_tool::Distribution **segment_length = NULL , + stat_tool::Distribution *prior_segment_length = NULL , + long double **begin_conditonal_entropy = NULL , + long double **end_conditional_entropy = NULL , + long double **change_point_entropy = NULL) const; + void change_point_profile_plotable_write(stat_tool::MultiPlot &plot , int index , int nb_segment , + long double **change_point) const; + void entropy_profile_plotable_write(stat_tool::MultiPlot &plot , int index , + long double *begin_conditional_entropy , + long double *end_conditional_entropy , + long double *change_point_entropy) const; + + void gamma_hyperparameter_computation(int index , int variable , + double *hyperparam) const; + void gaussian_gamma_hyperparameter_computation(int index , int variable , + double *hyperparam) const; + int nb_parameter_computation(int index , int nb_segment , segment_model *model_type , + bool common_contrast) const; + double one_segment_likelihood(int index , segment_model *model_type , bool common_contrast , + double *shape_parameter , double **rank) const; + double piecewise_linear_function(int index , int variable , int nb_segment , segment_model model_type , + bool common_contrast , int *change_point , int *seq_index_parameter , + double **piecewise_function , double **imean = NULL , double **variance = NULL , + double *global_variance = NULL , double **iintercept = NULL , double **islope = NULL , + double **iautoregressive_coeff = NULL , double **correlation = NULL , + double **slope_standard_deviation = NULL , double **iindex_parameter_mean = NULL , + long double **iindex_parameter_variance = NULL , double **determination_coeff = NULL) const; + std::ostream& piecewise_linear_function_ascii_print(std::ostream &os , int index , int variable , int nb_segment , + segment_model model_type , bool common_contrast , int *change_point , + int *seq_index_parameter , double **mean , double **variance , + double **intercept , double **slope , double **autoregressive_coeff , + double **correlation = NULL , double **slope_standard_deviation = NULL , + double **index_parameter_mean = NULL , long double **index_parameter_variance = NULL , + double **determination_coeff = NULL) const; + std::ostream& piecewise_linear_function_spreadsheet_print(std::ostream &os , int index , int variable , int nb_segment , + segment_model model_type , bool common_contrast , int *change_point , + int *seq_index_parameter , double **mean , double **variance , + double **intercept , double **slope , double **autoregressive_coeff , + double **correlation = NULL , double **slope_standard_deviation = NULL , + double **index_parameter_mean = NULL , long double **index_parameter_variance = NULL , + double **determination_coeff = NULL) const; + double continuous_piecewise_linear_function(std::ostream &os , int index , int variable , int nb_segment , + segment_model model_type , bool common_contrast , + int *change_point , int *seq_index_parameter , + double *intercept , double *slope , + double *corrected_intercept , double *corrected_slope) const; + Sequences* segmentation_output(int nb_segment , segment_model *model_type , bool common_contrast , + std::ostream *os , sequence_type output = SEQUENCE , int *ichange_point = NULL , + bool continuity = false); + void forward_contrast(int time , int index , segment_model *model_type , bool common_contrast , + double ***factorial , double *shape_parameter , double ***binomial_coeff , + double **seq_mean , int *seq_index_parameter , double **hyperparam , + double **rank , long double *contrast , int nb_segment = 0) const; + void backward_contrast(int time , int index , segment_model *model_type , bool common_contrast , + double ***factorial , double *shape_parameter , double ***binomial_coeff , + double **seq_mean , int *seq_index_parameter , double **hyperparam , + double **rank , long double *contrast) const; + double segmentation(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , double **rank , + double *isegmentation_likelihood = NULL , int *nb_parameter = NULL , + double *segment_penalty = NULL); + double prior_segment_length_inf_bound_computation(int index , int nb_segment , segment_model *model_type , + bool common_contrast) const; + double nb_segmentation_computation(int index , int nb_segment , segment_model *model_type , + bool common_contrast) const; + double* penalty_shape_computation(int index , int max_nb_segment , segment_model *model_type , + bool common_contrast , int penalty_shape_type) const; + double forward_backward(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , double **rank , + double *likelihood , long double *segmentation_entropy , + long double *first_order_entropy , long double *change_point_entropy , + double *uniform_entropy , long double *marginal_entropy) const; + double forward_backward(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , double **rank , + std::ostream *os , stat_tool::MultiPlotSet *plot_set , + double &segment_length_max , change_point_profile output , + stat_tool::output_format format) const; + double forward_backward_sampling(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , double **rank , + std::ostream &os , stat_tool::output_format format , + int nb_segmentation) const; + double N_segmentation(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , double **irank , + std::ostream &os , stat_tool::output_format format , int inb_segmentation , + double likelihood) const; + double forward_backward_dynamic_programming(int index , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + double **rank , std::ostream *os , + stat_tool::MultiPlotSet *plot_set , change_point_profile output , + stat_tool::output_format format , double likelihood = stat_tool::D_INF) const; + + std::ostream& profile_ascii_print(std::ostream &os , int index , int nb_state , + double **profiles , double *begin_conditional_entropy , + double *marginal_entropy , double *begin_partial_entropy , + double *end_conditional_entropy = NULL , double *end_partial_entropy = NULL) const; + std::ostream& profile_spreadsheet_print(std::ostream &os , int index , int nb_state , + double **profiles , double *begin_conditional_entropy , + double *marginal_entropy , double *begin_partial_entropy , + double *end_conditional_entropy = NULL , double *end_partial_entropy = NULL) const; + std::ostream& profile_plot_print(std::ostream &os , int index , int nb_state , + double **profiles , double *begin_conditional_entropy , + double *marginal_entropy , double *begin_partial_entropy , + double *end_conditional_entropy = NULL , double *end_partial_entropy = NULL) const; + void profile_plotable_write(stat_tool::MultiPlot &plot , int index , int nb_state , + double **profiles) const; + void segment_length_distribution_plotable_write(stat_tool::MultiPlot &plot , int nb_segment , + double segment_length_max , + stat_tool::Distribution **segment_length , + stat_tool::Distribution *prior_segment_length) const; + void entropy_profile_plotable_write(stat_tool::MultiPlot &plot , int index , double *begin_entropy , + double *end_entropy = NULL , double *marginal_entropy = NULL) const; + + bool segment_profile_write(stat_tool::StatError &error , std::ostream &os , int iidentifier , + int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + change_point_profile output = SEGMENT , + stat_tool::output_format format = stat_tool::ASCII , + stat_tool::latent_structure_algorithm segmentation = stat_tool::FORWARD_DYNAMIC_PROGRAMMING , + int nb_segmentation = NB_SEGMENTATION) const; + + public : + + Sequences(); + Sequences(int inb_sequence , int inb_variable); + Sequences(int inb_sequence , int *iidentifier , int *ilength , + int **ivertex_identifier , index_parameter_type iindex_param_type , int inb_variable , + stat_tool::variable_nature *itype , bool vertex_identifier_copy = true , bool init_flag = false) + { init(inb_sequence , iidentifier , ilength , ivertex_identifier , + iindex_param_type , inb_variable , itype , + vertex_identifier_copy , init_flag); } + Sequences(int inb_sequence , int *iidentifier , int *ilength , index_parameter_type iindex_param_type , // AML interface + int inb_variable , stat_tool::variable_nature itype , int ***iint_sequence); + Sequences(int inb_sequence , int *iidentifier , int *ilength , int inb_variable , // AML interface + double ***ireal_sequence); + Sequences(int inb_sequence , int *iidentifier , int *ilength , int **ivertex_identifier , + index_parameter_type iindex_param_type , int **iindex_parameter , int inb_variable , + stat_tool::variable_nature *itype , int ***iint_sequence , double ***ireal_sequence); + Sequences(int inb_sequence , const std::vector &iidentifier , int *ilength , + const std::vector > &ivertex_identifier , index_parameter_type iindex_param_type , + const std::vector > &iindex_parameter , int nb_int_variable , int nb_real_variable , + const std::vector > > &iint_sequence , + const std::vector > > &ireal_sequence); + Sequences(int inb_sequence , int *iidentifier , int *ilength , int inb_variable , + bool init_flag = false) + { init(inb_sequence , iidentifier , ilength , inb_variable , init_flag); } + Sequences(const stat_tool::FrequencyDistribution &ilength_distribution , int inb_variable , + stat_tool::variable_nature *itype , bool init_flag = false); + Sequences(const RenewalData &timev); + Sequences(const Sequences &seq , int variable , stat_tool::variable_nature itype); + Sequences(const Sequences &seq , int inb_sequence , int *index); + Sequences(const Sequences &seq , bool *auxiliary); + Sequences(const Sequences &seq , sequence_transformation transform = SEQUENCE_COPY); + ~Sequences(); + Sequences& operator=(const Sequences &seq); + + stat_tool::DiscreteDistributionData* extract(stat_tool::StatError &error , int variable) const; + + stat_tool::Vectors* build_vectors(bool index_variable) const; + stat_tool::Vectors* extract_vectors(stat_tool::StatError &error , sequence_pattern pattern , + int variable = stat_tool::I_DEFAULT , + int value = stat_tool::I_DEFAULT) const; + + MarkovianSequences* markovian_sequences(stat_tool::StatError &error) const; + + bool check(stat_tool::StatError &error , const char *pattern_label); + + TimeEvents* extract_time_events(stat_tool::StatError &error , int variable , + int begin_date , int end_date , int previous_date = stat_tool::I_DEFAULT , + int next_date = stat_tool::I_DEFAULT) const; + RenewalData* extract_renewal_data(stat_tool::StatError &error , int variable , + int begin_index_parameter , int end_index_parameter) const; + + /// Merge several sets of Sequences (arrays of Sequences) + Sequences* merge(stat_tool::StatError &error , int nb_sample , const Sequences **iseq) const; + /// Merge several sets of Sequences (std::vector) + Sequences* merge(stat_tool::StatError &error , int nb_sample , const std::vector &iseq) const; + + Sequences* shift(stat_tool::StatError &error , int variable , int shift_param) const; + Sequences* shift(stat_tool::StatError &error , int variable , double shift_param) const; + Sequences* thresholding(stat_tool::StatError &error , int variable , int threshold , + stat_tool::threshold_direction mode) const; + Sequences* thresholding(stat_tool::StatError &error , int variable , double threshold , + stat_tool::threshold_direction mode) const; + Sequences* cluster(stat_tool::StatError &error , int variable , int step , + stat_tool::rounding mode = stat_tool::FLOOR) const; + Sequences* transcode(stat_tool::StatError &error , int variable , int *category) const; + Sequences* transcode(stat_tool::StatError &error , int variable , std::vector &category) const; + Sequences* cluster(stat_tool::StatError &error , int variable , int nb_class , + int *ilimit) const; + Sequences* cluster(stat_tool::StatError &error , int variable , int nb_class , + std::vector &ilimit) const; + Sequences* cluster(stat_tool::StatError &error , int variable , int nb_class , + double *ilimit) const; + Sequences* cluster(stat_tool::StatError &error , int variable , int nb_class , + std::vector &ilimit) const; + Sequences* scaling(stat_tool::StatError &error , int variable , int scaling_coeff) const; + Sequences* scaling(stat_tool::StatError &error , int variable , double scaling_coeff) const; + Sequences* round(stat_tool::StatError &error , int variable = stat_tool::I_DEFAULT , + stat_tool::rounding mode = stat_tool::ROUND) const; + + Sequences* index_parameter_select(stat_tool::StatError &error , std::ostream *os , + int min_index_parameter , + int max_index_parameter , bool keep) const; + Sequences* value_select(stat_tool::StatError &error , std::ostream *os , int variable , + int imin_value , int imax_value , bool keep = true) const; + Sequences* value_select(stat_tool::StatError &error , std::ostream *os , int variable , + double imin_value , double imax_value , bool keep = true) const; + Sequences* select_individual(stat_tool::StatError &error , int inb_sequence , int *iidentifier , + bool keep = true) const; + Sequences* select_individual(stat_tool::StatError &error , int inb_sequence , std::vector &iidentifier , + bool keep = true) const; + /// Add index parameter + void set_index_parameter(stat_tool::StatError &error, int **index_parameter, + index_parameter_type index_param_type); + /// Copy of a Sequences object transforming some given existing variable into an index parameter + Sequences* set_variable_as_index_parameter(stat_tool::StatError &error, int ivariable, + index_parameter_type index_param_type) const; + Sequences* remove_index_parameter(stat_tool::StatError &error) const; + Sequences* explicit_index_parameter(stat_tool::StatError &error) const; + Sequences* select_variable(stat_tool::StatError &error , int inb_variable , int *ivariable , + bool keep = true) const; + Sequences* select_variable(stat_tool::StatError &error , int inb_variable , std::vector &ivariable , + bool keep = true) const; + Sequences* sum_variable(stat_tool::StatError &error , int nb_summed_variable , int *ivariable) const; + Sequences* sum_variable(stat_tool::StatError &error , int nb_summed_variable , std::vector &ivariable) const; + Sequences* merge_variable(stat_tool::StatError &error , int nb_sample , const Sequences **iseq , + int ref_sample = stat_tool::I_DEFAULT) const; + Sequences* merge_variable(stat_tool::StatError &error , int nb_sample , const std::vector &iseq , + int ref_sample = stat_tool::I_DEFAULT) const; + Sequences* difference_variable(stat_tool::StatError &error , const Sequences &residual) const; + Sequences* shift_variable(stat_tool::StatError &error , int variable , int lag) const; + + Sequences* reverse(stat_tool::StatError &error) const; + Sequences* length_select(stat_tool::StatError &error , std::ostream *os , int min_length , + int imax_length , bool keep = true) const; + Sequences* remove_run(stat_tool::StatError &error , int variable , int ivalue , + run_position position , int max_run_length = stat_tool::I_DEFAULT) const; + Sequences* truncate(stat_tool::StatError &error , int max_index_parameter) const; + Sequences* index_parameter_extract(stat_tool::StatError &error , int min_index_parameter , + int max_index_parameter = stat_tool::I_DEFAULT) const; + Sequences* segmentation_extract(stat_tool::StatError &error , int variable , int nb_value , + int *ivalue , bool keep = true , + bool concatenation = false) const; + Sequences* segmentation_extract(stat_tool::StatError &error , int variable , int nb_value , + std::vector &ivalue , bool keep = true , + bool concatenation = false) const; + + Sequences* cumulate(stat_tool::StatError &error , int variable = stat_tool::I_DEFAULT) const; + Sequences* difference(stat_tool::StatError &error , int variable = stat_tool::I_DEFAULT , + bool first_element = false) const; + Sequences* log_transform(stat_tool::StatError &error , int variable = stat_tool::I_DEFAULT , + stat_tool::log_base base = stat_tool::NATURAL) const; + Sequences* relative_growth_rate(stat_tool::StatError &error , double growth_factor = GROWTH_FACTOR) const; + Sequences* sequence_normalization(stat_tool::StatError &error , int variable = stat_tool::I_DEFAULT) const; + Sequences* moving_average(stat_tool::StatError &error , int nb_point , double *filter , + int variable = stat_tool::I_DEFAULT , bool begin_end = false , + bool segmentation = false , sequence_type output = TREND) const; + Sequences* moving_average(stat_tool::StatError &error , int nb_point , std::vector &filter , + int variable = stat_tool::I_DEFAULT , bool begin_end = false , + bool segmentation = false , sequence_type output = TREND) const; + Sequences* moving_average(stat_tool::StatError &error , const stat_tool::Distribution &dist , + int variable = stat_tool::I_DEFAULT , bool begin_end = false , + bool segmentation = false , sequence_type output = TREND) const; + + Sequences* pointwise_average(stat_tool::StatError &error , bool robust = false , bool circular = false , + bool dispersion = false , sequence_type output = SEQUENCE , + const std::string path = "" , + stat_tool::output_format format = stat_tool::ASCII) const; + + bool mean_error_computation(stat_tool::StatError &error , std::ostream *os , int variable , + int iidentifier = stat_tool::I_DEFAULT , bool robust = false) const; + + Sequences* recurrence_time_sequences(stat_tool::StatError &error , int variable , int value) const; + Sequences* sojourn_time_sequences(stat_tool::StatError &error , int variable) const; + + Sequences* transform_position(stat_tool::StatError &error , int step) const; + + Sequences* cross(stat_tool::StatError &error) const; + + static Sequences* build(stat_tool::StatError &error , index_parameter_type iindex_param_type , + const std::vector > &iindex_parameter , + const std::vector > > &iint_sequence , + const std::vector > > &ireal_sequence , + const std::vector &iidentifier , const std::vector > &ivertex_identifier); + static Sequences* ascii_read(stat_tool::StatError &error , const std::string path , + bool old_format = false); + + std::ostream& line_write(std::ostream &os) const; + + virtual std::ostream& ascii_data_write(std::ostream &os , output_sequence_format format = COLUMN , + bool exhaustive = false) const; + std::string ascii_data_write(output_sequence_format format = COLUMN , bool exhaustive = false) const; + virtual bool ascii_data_write(stat_tool::StatError &error , const std::string path , + output_sequence_format format = COLUMN , bool exhaustive = false) const; + bool plot_data_write(stat_tool::StatError &error , const char *prefix , + const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable_data(stat_tool::StatError &error) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + int min_index_parameter_computation() const; + int max_index_parameter_computation(bool last_position = false) const; + + void marginal_frequency_distribution_computation(int variable); + bool select_bin_width(stat_tool::StatError &error , int variable , double bin_width , + double imin_value = stat_tool::D_INF); + + double mean_computation(int variable) const; + double variance_computation(int variable , double mean) const; + double mean_absolute_deviation_computation(int variable , double location) const; + double mean_absolute_difference_computation(int variable) const; + double skewness_computation(int variable , double mean , double variance) const; + double kurtosis_computation(int variable , double mean , double variance) const; + double* mean_direction_computation(int variable , stat_tool::angle_unit unit) const; + + stat_tool::FrequencyDistribution* value_index_interval_computation(stat_tool::StatError &error , + int variable , int value) const; + + Correlation* correlation_computation(stat_tool::StatError &error , int variable1 , int variable2 , + stat_tool::correlation_type itype = stat_tool::PEARSON , + int max_lag = stat_tool::I_DEFAULT , + correlation_normalization normalization = EXACT , + bool individual_mean = false) const; + Correlation* partial_autocorrelation_computation(stat_tool::StatError &error , int variable , + stat_tool::correlation_type itype = stat_tool::PEARSON , + int max_lag = stat_tool::I_DEFAULT) const; + + stat_tool::DistanceMatrix* alignment(stat_tool::StatError &error , std::ostream *os , + const stat_tool::VectorDistance &ivector_dist , + int ref_identifier = stat_tool::I_DEFAULT , int test_identifier = stat_tool::I_DEFAULT , + bool begin_free = false , bool end_free = false , + insertion_deletion_cost indel_cost = ADAPTATIVE , + double indel_factor = INDEL_FACTOR_1 , bool transposition_flag = false , + double transposition_factor = TRANSPOSITION_FACTOR , + const std::string result_path = "" , stat_tool::output_format result_format = stat_tool::ASCII , + const std::string alignment_path = "") const; + stat_tool::DistanceMatrix* alignment(stat_tool::StatError &error , std::ostream *os , + int ref_identifier = stat_tool::I_DEFAULT , int test_identifier = stat_tool::I_DEFAULT , + bool begin_free = false , bool end_free = false , + const std::string result_path = "" , stat_tool::output_format result_format = stat_tool::ASCII , + const std::string alignment_path = "") const; + + Sequences* multiple_alignment(stat_tool::StatError &error , std::ostream *os , + const stat_tool::VectorDistance &ivector_dist , + bool begin_free = false , bool end_free = false , + insertion_deletion_cost indel_cost = ADAPTATIVE , + double indel_factor = INDEL_FACTOR_N , + stat_tool::hierarchical_strategy strategy = stat_tool::AGGLOMERATIVE , + const std::string path = "") const; + + Sequences* segmentation(stat_tool::StatError &error , std::ostream *os , int iidentifier , + int nb_segment , int *ichange_point , segment_model *model_type , + bool common_contrast , double *shape_parameter , + sequence_type output = SEQUENCE , bool continuity = false) const; + Sequences* segmentation(stat_tool::StatError &error , std::ostream *os , int iidentifier , + int nb_segment , std::vector &ichange_point , std::vector &model_type , + bool common_contrast , std::vector &shape_parameter , + sequence_type output = SEQUENCE , bool continuity = false) const; + Sequences* segmentation(stat_tool::StatError &error , std::ostream *os , int iidentifier , + int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + sequence_type output , bool continuity = false) const; + Sequences* segmentation(stat_tool::StatError &error , std::ostream *os , int iidentifier , + int nb_segment , std::vector &model_type , + bool common_contrast , std::vector &shape_parameter , + sequence_type output , bool continuity = false) const; + Sequences* segmentation(stat_tool::StatError &error , std::ostream *os , int iidentifier , + int max_nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + stat_tool::model_selection_criterion criterion = stat_tool::LIKELIHOOD_SLOPE , + int min_nb_segment = 0 , int penalty_shape_type = 2 , + sequence_type output = SEQUENCE) const; + Sequences* segmentation(stat_tool::StatError &error , std::ostream *os , int iidentifier , + int max_nb_segment , std::vector &model_type , + bool common_contrast , std::vector &shape_parameter , + stat_tool::model_selection_criterion criterion = stat_tool::LIKELIHOOD_SLOPE , + int min_nb_segment = 0 , int penalty_shape_type = 2 , + sequence_type output = SEQUENCE) const; + +// Sequences* hierarchical_segmentation(stat_tool::StatError &error , std::ostream &os , int iidentifier , +// int max_nb_segment , segment_model *model_type) const; + + bool segment_profile_ascii_write(stat_tool::StatError &error , int iidentifier , + int nb_segment , std::vector &model_type , + bool common_contrast , std::vector &shape_parameter , + change_point_profile output = SEGMENT , + stat_tool::latent_structure_algorithm segmentation = stat_tool::FORWARD_DYNAMIC_PROGRAMMING , + int nb_segmentation = NB_SEGMENTATION) const; + bool segment_profile_write(stat_tool::StatError &error , const std::string path , int iidentifier , + int nb_segment , std::vector &model_type , + bool common_contrast , std::vector &shape_parameter , + change_point_profile output = SEGMENT , + stat_tool::output_format format = stat_tool::ASCII , + stat_tool::latent_structure_algorithm segmentation = stat_tool::FORWARD_DYNAMIC_PROGRAMMING , + int nb_segmentation = NB_SEGMENTATION) const; + + bool segment_profile_plot_write(stat_tool::StatError &error , const char *prefix , + int iidentifier , int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + change_point_profile output = SEGMENT , const char *title = NULL) const; + stat_tool::MultiPlotSet* segment_profile_plotable_write(stat_tool::StatError &error , int iidentifier , + int nb_segment , segment_model *model_type , + bool common_contrast , double *shape_parameter , + change_point_profile output = SEGMENT) const; + stat_tool::MultiPlotSet* segment_profile_plotable_write(stat_tool::StatError &error , int iidentifier , + int nb_segment , std::vector &model_type , + bool common_contrast , std::vector &shape_parameter , + change_point_profile output = SEGMENT) const; + + // class member access + + int get_nb_sequence() const { return nb_sequence; } + int get_identifier(int iseq) const { return identifier[iseq]; } + int get_max_length() const { return max_length; } + int get_cumul_length() const { return cumul_length; } + int get_length(int index_seq) const { return length[index_seq]; } + stat_tool::FrequencyDistribution* get_length_distribution() const { return length_distribution; } + int get_vertex_identifier(int iseq , int index) const + { return vertex_identifier[iseq][index]; } + index_parameter_type get_index_param_type() const { return index_param_type; } + stat_tool::FrequencyDistribution* get_index_parameter_distribution() const { return index_parameter_distribution; } + stat_tool::FrequencyDistribution* get_index_interval() const { return index_interval; } + /// return index parameter for sequence iseq and given index + int get_index_parameter(int iseq , int index) const + { return index_parameter[iseq][index]; } + /// return whole set of index parameters + int** get_index_parameter() const + { return index_parameter; } + int get_nb_variable() const { return nb_variable; } + stat_tool::variable_nature get_type(int variable) const { return type[variable]; } + double get_min_value(int variable) const { return min_value[variable]; } + double get_max_value(int variable) const { return max_value[variable]; } + stat_tool::FrequencyDistribution* get_marginal_distribution(int variable) const + { return marginal_distribution[variable]; } + stat_tool::Histogram* get_marginal_histogram(int variable) const + { return marginal_histogram[variable]; } + int get_int_sequence(int iseq , int variable , int index) const + { return int_sequence[iseq][variable][index]; } + double get_real_sequence(int iseq , int variable , int index) const + { return real_sequence[iseq][variable][index]; } + int** get_int_sequence(int iseq) const + { return int_sequence[iseq]; } + double** get_real_sequence(int iseq) const + { return real_sequence[iseq]; } + }; + + + /// \brief Sequence characteristics for a categorical variable + + class SequenceCharacteristics { + + public : + + int nb_value; ///< number of categories + stat_tool::Curves *index_value; ///< empirical probabilities of each category as a function of the index parameter + stat_tool::Curves *explicit_index_value; ///< empirical probabilities of each category as a function of the explicit index parameter + stat_tool::FrequencyDistribution **first_occurrence; ///< time to the 1st occurrence frequency distributions + stat_tool::FrequencyDistribution **recurrence_time; ///< recurrence time frequency distributions + stat_tool::FrequencyDistribution **sojourn_time; ///< complete sojourn time frequency distributions + stat_tool::FrequencyDistribution **initial_run; ///< left-censored sojourn time frequency distributions + stat_tool::FrequencyDistribution **final_run; ///< right-censored sojourn time frequency distributions + stat_tool::FrequencyDistribution **nb_run; ///< number of runs per sequence frequency distributions + stat_tool::FrequencyDistribution **nb_occurrence; ///< number of occurrences per sequence frequency distributions + + void copy(const SequenceCharacteristics &characteristics); + void reverse(const SequenceCharacteristics &characteristics); + void remove(); + + void create_sojourn_time_frequency_distribution(int max_length , int initial_run_flag = false); + + std::ostream& ascii_print(std::ostream &os , int type , + const stat_tool::FrequencyDistribution &length_distribution , + bool exhaustive , bool comment_flag) const; + std::ostream& spreadsheet_print(std::ostream &os , int type , + const stat_tool::FrequencyDistribution &length_distribution) const; + bool plot_print(const char *prefix , const char *title , int variable , + int nb_variable , int type , const stat_tool::FrequencyDistribution &length_distribution) const; + void plotable_write(stat_tool::MultiPlotSet &plot , int &index , int variable , + int type , const stat_tool::FrequencyDistribution &length_distribution) const; + + SequenceCharacteristics(int inb_value = stat_tool::I_DEFAULT); + SequenceCharacteristics(const SequenceCharacteristics &characteristics , + bool initial_run_flag); + SequenceCharacteristics(const SequenceCharacteristics &characteristics , + sequence_transformation transform = SEQUENCE_COPY); + ~SequenceCharacteristics(); + SequenceCharacteristics& operator=(const SequenceCharacteristics &characteristics); + }; + + + class Function; + + /// \brief Self-transition probabilitiy as a function of the index parameter + + class SelfTransition : public stat_tool::Curves { + + public : + + SelfTransition(int ilength) + :stat_tool::Curves(1 , ilength , true , false) {} + + Function* monomolecular_regression() const; + Function* logistic_regression() const; + }; + + + class VariableOrderMarkovChain; + class VariableOrderMarkovChainData; + + /// \brief Sequences potentially generated by a (hidden) Markovian process + + class MarkovianSequences : public Sequences { + + friend class VariableOrderMarkovChain; + friend class VariableOrderMarkov; + friend class HiddenVariableOrderMarkov; + friend class SemiMarkov; + friend class HiddenSemiMarkov; + friend class NonhomogeneousMarkov; + + friend std::ostream& operator<<(std::ostream &os , const MarkovianSequences &seq) + { return seq.ascii_write(os); } + + protected : + + double *min_interval; ///< minimum intervals between 2 values + SelfTransition **self_transition; ///< self transition probability as a function of the index parameter + stat_tool::FrequencyDistribution ***observation_distribution; ///< observation frequency distributions + stat_tool::Histogram ***observation_histogram; ///< observation histograms + SequenceCharacteristics **characteristics; ///< characteristics for categorical variables + + void init(); + void copy(const MarkovianSequences &seq , initial_run param = UNCHANGED); + void reverse(const MarkovianSequences &seq); + void add_state_variable(const MarkovianSequences &seq , initial_run param); + void remove(); + + MarkovianSequences* transcode(stat_tool::StatError &error , + const CategoricalSequenceProcess *process) const; + MarkovianSequences* build_auxiliary_variable(stat_tool::DiscreteParametricProcess **discrete_process , + stat_tool::ContinuousParametricProcess **continuous_process) const; + MarkovianSequences* residual_sequences(CategoricalSequenceProcess **categorical_process , + stat_tool::DiscreteParametricProcess **discrete_process , + stat_tool::ContinuousParametricProcess **continuous_process) const; + + MarkovianSequences* remove_variable_1() const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive , bool comment_flag) const; + bool plot_print(const char *prefix , const char *title , int variable , + int nb_variable) const; + void plotable_write(stat_tool::MultiPlotSet &plot , int &index , int variable) const; + + void state_variable_init(stat_tool::variable_nature itype = stat_tool::STATE); + + void min_interval_computation(int variable); + + void transition_count_computation(const VariableOrderMarkovChainData &chain_data , + const VariableOrderMarkovChain &markov , + bool begin = true , bool non_terminal = false) const; + void transition_count_computation(const stat_tool::ChainData &chain_data , + const SemiMarkov *smarkov = NULL) const; + + void self_transition_computation(int state); + stat_tool::Distribution* weight_computation() const; + void observation_frequency_distribution_computation(int variable , int nb_state); + bool test_hidden(int variable) const; + + void build_index_value(int variable); + void build_explicit_index_value(int variable); + void build_first_occurrence_frequency_distribution(int variable); + void build_recurrence_time_frequency_distribution(int variable); + void build_sojourn_time_frequency_distribution(int variable , int initial_run_flag = false); + void build_nb_run_frequency_distribution(int variable); + void build_nb_occurrence_frequency_distribution(int variable); + + void censored_sojourn_time_frequency_distribution_computation(stat_tool::FrequencyDistribution **initial_run , + stat_tool::FrequencyDistribution **final_run , + stat_tool::FrequencyDistribution **single_run) const; + + std::ostream& linear_model_spreadsheet_print(std::ostream &os , int variable , + stat_tool::ContinuousParametricProcess *process) const; + bool linear_model_plot_print(const char *prefix , const char *title , int variable , + stat_tool::ContinuousParametricProcess *process) const; + void linear_model_plotable_write(stat_tool::MultiPlotSet &plot , int &index , int variable , + stat_tool::ContinuousParametricProcess *process) const; + + void autocorrelation_computation(Correlation &correl , int state , int variable) const; + std::ostream& autoregressive_model_ascii_print(std::ostream &os , int variable , + stat_tool::ContinuousParametricProcess *process , + bool file_flag) const; + std::ostream& autoregressive_model_spreadsheet_print(std::ostream &os , int variable , + stat_tool::ContinuousParametricProcess *process) const; + bool autoregressive_model_plot_print(const char *prefix , const char *title , int variable , + stat_tool::ContinuousParametricProcess *process) const; + void autoregressive_model_plotable_write(stat_tool::MultiPlotSet &plot , int &index , int variable , + stat_tool::ContinuousParametricProcess *process) const; + + template + void gamma_estimation(Type ***state_sequence_count , int variable , + stat_tool::ContinuousParametricProcess *process , int iter) const; + template + void zero_inflated_gamma_estimation(Type ***state_sequence_count , int variable , + stat_tool::ContinuousParametricProcess *process , int iter) const; + template + void inverse_gaussian_estimation(Type ***state_sequence_count , int variable , + stat_tool::ContinuousParametricProcess *process) const; + template + void gaussian_estimation(Type ***state_sequence_count , int variable , + stat_tool::ContinuousParametricProcess *process) const; + template + void von_mises_estimation(Type ***state_sequence_count , int variable , + stat_tool::ContinuousParametricProcess *process) const; + template + void linear_model_estimation(Type ***state_sequence_count , int variable , + stat_tool::ContinuousParametricProcess *process) const; + template + void autoregressive_model_estimation(Type ***state_sequence_count , int variable , + stat_tool::ContinuousParametricProcess *process) const; + + std::ostream& likelihood_write(std::ostream &os , int nb_model , double **likelihood , + const char *label , bool exhaustive = false , + stat_tool::latent_structure_algorithm algorithm = stat_tool::NO_LATENT_STRUCTURE) const; + bool likelihood_write(stat_tool::StatError &error , const std::string path , int nb_model , + double **likelihood , const char *label , + stat_tool::latent_structure_algorithm algorithm = stat_tool::NO_LATENT_STRUCTURE) const; + + public : + + MarkovianSequences(); + MarkovianSequences(int inb_sequence , int *iidentifier , int *ilength , + int **ivertex_identifier , index_parameter_type iindex_param_type , int inb_variable , + stat_tool::variable_nature *itype , bool vertex_identifier_copy = true , bool init_flag = false); + MarkovianSequences(const stat_tool::FrequencyDistribution &ilength_distribution , int inb_variable , + stat_tool::variable_nature *itype , bool init_flag = false); + MarkovianSequences(const MarkovianSequences &seq , int variable , stat_tool::variable_nature itype); + MarkovianSequences(const Sequences &seq); + MarkovianSequences(const MarkovianSequences &seq , bool *auxiliary); + MarkovianSequences(const MarkovianSequences &seq , sequence_transformation transform = SEQUENCE_COPY , + initial_run param = UNCHANGED); + ~MarkovianSequences(); + MarkovianSequences& operator=(const MarkovianSequences &seq); + + stat_tool::DiscreteDistributionData* extract(stat_tool::StatError &error , stat_tool::process_distribution type , + int variable , int value) const; + + + /// Merge several sets of MarkovianSequences (arrays of MarkovianSequences) + MarkovianSequences* merge(stat_tool::StatError &error , int nb_sample , + const MarkovianSequences **iseq) const; + /// Merge several sets of MarkovianSequences (std::vector) + MarkovianSequences* merge(stat_tool::StatError &error , int nb_sample , + const std::vector &iseq) const; + + MarkovianSequences* cluster(stat_tool::StatError &error , int variable , int step , + stat_tool::rounding mode = stat_tool::FLOOR) const; + MarkovianSequences* transcode(stat_tool::StatError &error , int ivariable , int *category , + bool add_variable = false) const; + MarkovianSequences* transcode(stat_tool::StatError &error , int ivariable , std::vector &category , + bool add_variable = false) const; + MarkovianSequences* consecutive_values(stat_tool::StatError &error , std::ostream *os , + int ivariable , bool add_variable = false) const; + MarkovianSequences* cluster(stat_tool::StatError &error , int ivariable , int nb_class , + int *ilimit , bool add_variable = false) const; + MarkovianSequences* cluster(stat_tool::StatError &error , int ivariable , int nb_class , + std::vector &ilimit , bool add_variable = false) const; + MarkovianSequences* cluster(stat_tool::StatError &error , int variable , int nb_class , + double *ilimit) const; + MarkovianSequences* cluster(stat_tool::StatError &error , int variable , int nb_class , + std::vector &ilimit) const; + + MarkovianSequences* remove_index_parameter(stat_tool::StatError &error) const; + MarkovianSequences* explicit_index_parameter(stat_tool::StatError &error) const; + MarkovianSequences* select_variable(stat_tool::StatError &error , int inb_variable , + int *ivariable , bool keep = true) const; + MarkovianSequences* select_variable(stat_tool::StatError &error , int inb_variable , + std::vector &ivariable , bool keep = true) const; + MarkovianSequences* merge_variable(stat_tool::StatError &error , int nb_sample , + const MarkovianSequences **iseq , + int ref_sample = stat_tool::I_DEFAULT) const; + MarkovianSequences* merge_variable(stat_tool::StatError &error , int nb_sample , + const std::vector &iseq , + int ref_sample = stat_tool::I_DEFAULT) const; + + MarkovianSequences* initial_run_computation(stat_tool::StatError &error) const; + MarkovianSequences* add_absorbing_run(stat_tool::StatError &error , + int run_length = stat_tool::I_DEFAULT , + int sequence_length = stat_tool::I_DEFAULT , + bool add_variable = false) const; + + MarkovianSequences* split(stat_tool::StatError &error , int step) const; + + std::ostream& ascii_data_write(std::ostream &os , output_sequence_format format = COLUMN , + bool exhaustive = false) const; + std::string ascii_data_write(output_sequence_format format = COLUMN , bool exhaustive = false) const; + bool ascii_data_write(stat_tool::StatError &error , const std::string path , + output_sequence_format format = COLUMN , bool exhaustive = false) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + bool transition_count(stat_tool::StatError &error , std::ostream *os , int max_order , + bool begin = false , transition_estimator estimator = MAXIMUM_LIKELIHOOD , + const std::string path = "") const; + bool word_count(stat_tool::StatError &error , std::ostream *os , int variable , int word_length , + int begin_state = stat_tool::I_DEFAULT , int end_state = stat_tool::I_DEFAULT , + int min_frequency = 1) const; + bool mtg_write(stat_tool::StatError &error , const std::string path , stat_tool::variable_type *itype) const; + + int cumulative_distribution_function_computation(int variable , double **cdf) const; + int cumulative_distribution_function_computation(int variable , int state , double **cdf) const; + + double iid_information_computation() const; + + void self_transition_computation(); + void self_transition_computation(bool *homogeneity); + void sojourn_time_frequency_distribution_computation(int variable); + + void build_observation_frequency_distribution(int nb_state); + void build_observation_histogram(int variable , int nb_state , double bin_width = stat_tool::D_DEFAULT); + void build_observation_histogram(int nb_state); + bool select_bin_width(stat_tool::StatError &error , int variable , double bin_width , + double imin_value = stat_tool::D_INF); + + void build_characteristic(int variable = stat_tool::I_DEFAULT , bool sojourn_time_flag = true , + bool initial_run_flag = false); + + NonhomogeneousMarkov* nonhomogeneous_markov_estimation(stat_tool::StatError &error , stat_tool::parametric_function *ident , + bool counting_flag = true) const; + + VariableOrderMarkov* variable_order_markov_estimation(stat_tool::StatError &error , std::ostream *os , + stat_tool::process_type itype , int min_order = 0 , + int max_order = stat_tool::ORDER , + memory_tree_selection algorithm = LOCAL_BIC , + double threshold = LOCAL_BIC_THRESHOLD , + transition_estimator estimator = LAPLACE , + bool global_initial_transition = true , + bool global_sample = true , + bool counting_flag = true) const; + VariableOrderMarkov* variable_order_markov_estimation(stat_tool::StatError &error , + const VariableOrderMarkov &imarkov , + bool global_initial_transition = true , + bool counting_flag = true) const; + VariableOrderMarkov* variable_order_markov_estimation(stat_tool::StatError &error , + stat_tool::process_type itype , int order = 1 , + bool global_initial_transition = true , + bool counting_flag = true) const; + + VariableOrderMarkov* lumpability_estimation(stat_tool::StatError &error , std::ostream *os , int *category , + stat_tool::model_selection_criterion criterion = stat_tool::BIC , + int order = 1 , bool counting_flag = true) const; + + SemiMarkov* semi_markov_estimation(stat_tool::StatError &error , std::ostream *os , stat_tool::process_type itype , + stat_tool::censoring_estimator estimator = stat_tool::COMPLETE_LIKELIHOOD , + bool counting_flag = true , int nb_iter = stat_tool::I_DEFAULT , + stat_tool::duration_distribution_mean_estimator mean_estimator = stat_tool::COMPUTED) const; + + HiddenVariableOrderMarkov* hidden_variable_order_markov_estimation(stat_tool::StatError &error , std::ostream *os , + const HiddenVariableOrderMarkov &ihmarkov , + bool global_initial_transition = true , + bool common_dispersion = false , + bool counting_flag = true , + bool state_sequence = true , + int nb_iter = stat_tool::I_DEFAULT) const; + HiddenVariableOrderMarkov* hidden_variable_order_markov_stochastic_estimation(stat_tool::StatError &error , std::ostream *os , + const HiddenVariableOrderMarkov &ihmarkov , + bool global_initial_transition = true , + bool common_dispersion = false , + int min_nb_state_sequence = MIN_NB_STATE_SEQUENCE , + int max_nb_state_sequence = MAX_NB_STATE_SEQUENCE , + double parameter = NB_STATE_SEQUENCE_PARAMETER , + bool counting_flag = true , + bool state_sequence = true , + int nb_iter = stat_tool::I_DEFAULT) const; + + HiddenSemiMarkov* hidden_semi_markov_estimation(stat_tool::StatError &error , std::ostream *os , + const HiddenSemiMarkov &ihsmarkov , + bool geometric_poisson = false , + bool common_dispersion = false , + stat_tool::censoring_estimator estimator = stat_tool::COMPLETE_LIKELIHOOD , + bool counting_flag = true , + bool state_sequence = true , + int nb_iter = stat_tool::I_DEFAULT , + stat_tool::duration_distribution_mean_estimator mean_estimator = stat_tool::COMPUTED) const; + HiddenSemiMarkov* hidden_semi_markov_estimation(stat_tool::StatError &error , std::ostream *os , + stat_tool::process_type itype , int nb_state , bool left_right , + double occupancy_mean = stat_tool::D_DEFAULT , + bool geometric_poisson = false , + bool common_dispersion = false , + stat_tool::censoring_estimator estimator = stat_tool::COMPLETE_LIKELIHOOD , + bool counting_flag = true , + bool state_sequence = true , + int nb_iter = stat_tool::I_DEFAULT , + stat_tool::duration_distribution_mean_estimator mean_estimator = stat_tool::COMPUTED) const; + HiddenSemiMarkov* hidden_semi_markov_stochastic_estimation(stat_tool::StatError &error , std::ostream *os , + const HiddenSemiMarkov &ihsmarkov , + bool geometric_poisson = false , + bool common_dispersion = false , + int min_nb_state_sequence = MIN_NB_STATE_SEQUENCE , + int max_nb_state_sequence = MAX_NB_STATE_SEQUENCE , + double parameter = NB_STATE_SEQUENCE_PARAMETER , + stat_tool::censoring_estimator estimator = stat_tool::COMPLETE_LIKELIHOOD , + bool counting_flag = true , + bool state_sequence = true , + int nb_iter = stat_tool::I_DEFAULT) const; + HiddenSemiMarkov* hidden_semi_markov_stochastic_estimation(stat_tool::StatError &error , std::ostream *os , + stat_tool::process_type itype , int nb_state , bool left_right , + double occupancy_mean = stat_tool::D_DEFAULT , + bool geometric_poisson = false , + bool common_dispersion = false , + int min_nb_state_sequence = MIN_NB_STATE_SEQUENCE , + int max_nb_state_sequence = MAX_NB_STATE_SEQUENCE , + double parameter = NB_STATE_SEQUENCE_PARAMETER , + stat_tool::censoring_estimator estimator = stat_tool::COMPLETE_LIKELIHOOD , + bool counting_flag = true , + bool state_sequence = true , + int nb_iter = stat_tool::I_DEFAULT) const; + + bool lumpability_test(stat_tool::StatError &error , std::ostream &os , int *category , int order = 1) const; + + bool comparison(stat_tool::StatError &error , std::ostream *os , int nb_model , + const VariableOrderMarkov **imarkov , const std::string path = "") const; + + bool comparison(stat_tool::StatError &error , std::ostream *os , int nb_model , + const SemiMarkov **ismarkov , const std::string path = "") const; + + bool comparison(stat_tool::StatError &error , std::ostream *os , int nb_model , + const HiddenVariableOrderMarkov **ihmarkov , + stat_tool::latent_structure_algorithm algorithm = stat_tool::FORWARD , + const std::string path = "") const; + + bool comparison(stat_tool::StatError &error , std::ostream *os , int nb_model , + const HiddenSemiMarkov **ihsmarkov , + stat_tool::latent_structure_algorithm algorithm = stat_tool::FORWARD , + const std::string path = "") const; + + // class member access + + stat_tool::Curves* get_self_transition(int state) const { return self_transition[state]; } + stat_tool::FrequencyDistribution*** get_observation_distribution() const + { return observation_distribution; } + stat_tool::FrequencyDistribution** get_observation_distribution(int variable) const + { return observation_distribution[variable]; } + stat_tool::FrequencyDistribution* get_observation_distribution(int variable , int state) const + { return observation_distribution[variable][state]; } + stat_tool::Histogram*** get_observation_histogram() const { return observation_histogram; } + stat_tool::Histogram** get_observation_histogram(int variable) const + { return observation_histogram[variable]; } + stat_tool::Histogram* get_observation_histogram(int variable , int state) const + { return observation_histogram[variable][state]; } + SequenceCharacteristics* get_characteristics(int variable) const + { return characteristics[variable]; } + }; + + +}; // namespace sequence_analysis + + + +#include "continuous_parametric_sequence_estimation.hpp" + + + +#endif diff --git a/src/cpp/sequence_analysis/sequences1.cpp b/src/cpp/sequence_analysis/sequences1.cpp new file mode 100644 index 0000000..bbfa9d8 --- /dev/null +++ b/src/cpp/sequence_analysis/sequences1.cpp @@ -0,0 +1,3208 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include "stat_tool/stat_label.h" + +#include "renewal.h" +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the Sequences class. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences() + +{ + nb_sequence = 0; + identifier = NULL; + + max_length = 0; + cumul_length = 0; + length = NULL; + length_distribution = NULL; + + vertex_identifier = NULL; + + index_param_type = IMPLICIT_TYPE; + index_parameter_distribution = NULL; + index_interval = NULL; + index_parameter = NULL; + + nb_variable = 0; + + type = NULL; + min_value = NULL; + max_value = NULL; + marginal_distribution = NULL; + marginal_histogram = NULL; + + int_sequence = NULL; + real_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Sequences class. + * + * \param[in] inb_sequence number of sequences, + * \param[in] inb_variable number of variables. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(int inb_sequence , int inb_variable) + +{ + int i , j; + + + nb_sequence = inb_sequence; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = i + 1; + } + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = 0; + } + + max_length = 0; + cumul_length = 0; + length_distribution = NULL; + + vertex_identifier = NULL; + + index_param_type = IMPLICIT_TYPE; + index_parameter_distribution = NULL; + index_interval = NULL; + index_parameter = NULL; + + nb_variable = inb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + type[i] = INT_VALUE; + min_value[i] = 0.; + max_value[i] = 0.; + marginal_distribution[i] = NULL; + marginal_histogram[i] = NULL; + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + int_sequence[i][j] = NULL; + real_sequence[i][j] = NULL; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Initialization of a Sequences object. + * + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] ilength sequence lengths, + * \param[in] ivertex_identifier vertex identifiers of the associated MTG, + * \param[in] iindex_param_type index parameter type, + * \param[in] inb_variable number of variables, + * \param[in] itype variable types, + * \param[in] vertex_identifier_copy flag copy of vertex identifiers, + * \param[in] init_flag flag initialization. + */ +/*--------------------------------------------------------------*/ + +void Sequences::init(int inb_sequence , int *iidentifier , int *ilength , + int **ivertex_identifier , index_parameter_type iindex_param_type , + int inb_variable , variable_nature *itype , bool vertex_identifier_copy , + bool init_flag) + +{ + int i , j , k; + int blength; + + + nb_sequence = inb_sequence; + + identifier = new int[nb_sequence]; + + if (iidentifier) { + for (i = 0;i < nb_sequence;i++) { + identifier[i] = iidentifier[i]; + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + identifier[i] = i + 1; + } + } + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = ilength[i]; + } + + max_length_computation(); + cumul_length_computation(); + build_length_frequency_distribution(); + + if (ivertex_identifier) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + + if (vertex_identifier_copy) { + for (j = 0;j < length[i];j++) { + vertex_identifier[i][j] = ivertex_identifier[i][j]; + } + } + } + } + + else { + vertex_identifier = NULL; + } + + index_param_type = iindex_param_type; + index_parameter_distribution = NULL; + index_interval = NULL; + + if (index_param_type != IMPLICIT_TYPE) { + index_parameter = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + blength = ((index_param_type == POSITION) || (index_param_type == POSITION_INTERVAL) ? length[i] + 1 : length[i]); + index_parameter[i] = new int[blength]; + + if (init_flag) { + for (j = 0;j < blength;j++) { + index_parameter[i][j] = 0; + } + } + } + } + + else { + index_parameter = NULL; + } + + nb_variable = inb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + type[i] = itype[i]; + min_value[i] = 0.; + max_value[i] = 0.; + marginal_distribution[i] = NULL; + marginal_histogram[i] = NULL; + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + if (init_flag) { + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = 0; + } + } + } + + else { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + + if (init_flag) { + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = 0.; + } + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Initialization of a Sequences object. + * + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] ilength sequence lengths, + * \param[in] inb_variable number of variables, + * \param[in] init_flag flag initialization. + */ +/*--------------------------------------------------------------*/ + +void Sequences::init(int inb_sequence , int *iidentifier , int *ilength , + int inb_variable , bool init_flag) + +{ + int i , j , k; + + + nb_sequence = inb_sequence; + + identifier = new int[nb_sequence]; + if (iidentifier) { + for (i = 0;i < nb_sequence;i++) { + identifier[i] = iidentifier[i]; + } + } + else { + for (i = 0;i < nb_sequence;i++) { + identifier[i] = i + 1; + } + } + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = ilength[i]; + } + + max_length_computation(); + cumul_length_computation(); + build_length_frequency_distribution(); + + vertex_identifier = NULL; + + index_param_type = IMPLICIT_TYPE; + index_parameter_distribution = NULL; + index_interval = NULL; + index_parameter = NULL; + + nb_variable = inb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + type[i] = INT_VALUE; + min_value[i] = 0.; + max_value[i] = 0.; + marginal_distribution[i] = NULL; + marginal_histogram[i] = NULL; + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + if (init_flag) { + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = 0; + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Sequences class. + * + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] ilength sequence lengths, + * \param[in] iindex_param_type index parameter type (TIME/POSITION), + * \param[in] inb_variable number of variables, + * \param[in] itype variable type, + * \param[in] iint_sequence (index parameters and) integer-valued sequences. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(int inb_sequence , int *iidentifier , int *ilength , + index_parameter_type iindex_param_type , int inb_variable , + variable_nature itype , int ***iint_sequence) +: identifier(NULL), + max_length(0), + cumul_length(0), + length_distribution(NULL), + vertex_identifier(NULL), + index_parameter_distribution(NULL), + index_interval(NULL), + index_parameter(NULL), + type(NULL), + min_value(NULL), + max_value(NULL), + marginal_distribution(NULL), + marginal_histogram(NULL), + real_sequence(NULL) +{ + int i , j , k; + int *pisequence , *cisequence; + variable_nature *btype; + + + btype = new variable_nature[inb_variable]; + for (i = 0;i < inb_variable;i++) { + btype[i] = itype; + } + + init(inb_sequence , iidentifier , ilength , NULL , iindex_param_type , + inb_variable , btype , false , false); + delete [] btype; + +// if (index_param_type != IMPLICIT_TYPE) { + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + index_parameter[i][j] = iint_sequence[i][0][j]; + } + } + + build_index_parameter_frequency_distribution(); + +// if ((index_param_type == TIME) || ((index_param_type == POSITION) && +// (type[0] != NB_INTERNODE))) { + if ((index_param_type == TIME) || (index_param_type == POSITION)) { + index_interval_computation(); + } + } + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pisequence = int_sequence[i][j]; + if (index_parameter) { + cisequence = iint_sequence[i][j + 1]; + } + else { + cisequence = iint_sequence[i][j]; + } + + for (k = 0;k < length[i];k++) { + *pisequence++ = *cisequence++; + } + } + } + + for (i = 0;i < nb_variable;i++) { + min_value_computation(i); + max_value_computation(i); + + build_marginal_frequency_distribution(i); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Sequences class. + * + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] ilength sequence lengths, + * \param[in] inb_variable number of variables, + * \param[in] ireal_sequence real-valued sequences. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(int inb_sequence , int *iidentifier , int *ilength , + int inb_variable , double ***ireal_sequence) +: identifier(NULL), + max_length(0), + cumul_length(0), + length_distribution(NULL), + vertex_identifier(NULL), + index_parameter_distribution(NULL), + index_interval(NULL), + index_parameter(NULL), + type(NULL), + min_value(NULL), + max_value(NULL), + marginal_distribution(NULL), + marginal_histogram(NULL), + int_sequence(NULL) + +{ + int i , j , k; + variable_nature *itype; + + + itype = new variable_nature[inb_variable]; + for (i = 0;i < inb_variable;i++) { + itype[i] = REAL_VALUE; + } + + init(inb_sequence , iidentifier , ilength , NULL , IMPLICIT_TYPE , + inb_variable , itype , false , false); + delete [] itype; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = ireal_sequence[i][j][k]; + } + } + } + + for (i = 0;i < nb_variable;i++) { + min_value_computation(i); + max_value_computation(i); + + build_marginal_histogram(i); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Sequences class. + * + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] ilength sequence lengths, + * \param[in] ivertex_identifier vertex identifiers of the associated MTG, + * \param[in] iindex_param_type index parameter type (TIME/POSITION), + * \param[in] iindex_parameter index parameters, + * \param[in] inb_variable number of variables, + * \param[in] itype variable type, + * \param[in] iint_sequence integer-valued sequences, + * \param[in] ireal_sequence real-valued sequences. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(int inb_sequence , int *iidentifier , int *ilength , + int **ivertex_identifier , index_parameter_type iindex_param_type , + int **iindex_parameter , int inb_variable , variable_nature *itype , + int ***iint_sequence , double ***ireal_sequence) +: identifier(NULL), + max_length(0), + cumul_length(0), + length_distribution(NULL), + vertex_identifier(NULL), + index_parameter_distribution(NULL), + index_interval(NULL), + index_parameter(NULL), + type(NULL), + min_value(NULL), + max_value(NULL), + marginal_distribution(NULL), + marginal_histogram(NULL), + int_sequence(NULL), + real_sequence(NULL) +{ + int i , j , k , m , n; + + + init(inb_sequence , iidentifier , ilength , ivertex_identifier , + iindex_param_type , inb_variable , itype , true , false); + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + index_parameter[i][j] = iindex_parameter[i][j]; + } + } + + build_index_parameter_frequency_distribution(); + + if ((index_param_type == TIME) || (index_param_type == POSITION)) { + index_interval_computation(); + } + } + + i = 0; + j = 0; + for (k = 0;k < nb_variable;k++) { + switch (type[k]) { + + case INT_VALUE : { + for (m = 0;m < nb_sequence;m++) { + for (n = 0;n < length[m];n++) { + int_sequence[m][k][n] = iint_sequence[m][i][n]; + } + } + i++; + break; + } + + case REAL_VALUE : { + for (m = 0;m < nb_sequence;m++) { + for (n = 0;n < length[m];n++) { + real_sequence[m][k][n] = ireal_sequence[m][j][n]; + } + } + j++; + break; + } + } + } + + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + // min_value[i] = 0.; + // max_value[i] = 0.; + min_value_computation(i); + max_value_computation(i); + + switch (type[i]) { + case INT_VALUE : + build_marginal_frequency_distribution(i); + break; + case REAL_VALUE : + build_marginal_histogram(i); + break; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Sequences class. + * + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] ilength sequence lengths, + * \param[in] ivertex_identifier vertex identifiers of the associated MTG, + * \param[in] iindex_param_type index parameter type (TIME/POSITION), + * \param[in] iindex_parameter index parameters, + * \param[in] inb_variable number of variables, + * \param[in] itype variable type, + * \param[in] iint_sequence integer-valued sequences, + * \param[in] ireal_sequence real-valued sequences. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(int inb_sequence , const vector &iidentifier , int *ilength , + const vector > &ivertex_identifier , index_parameter_type iindex_param_type , + const vector > &iindex_parameter , int nb_int_variable , int nb_real_variable , + const vector > > &iint_sequence , const vector > > &ireal_sequence) + +{ + int i , j , k , m; + variable_nature *itype; + + + itype = new variable_nature[nb_int_variable + nb_real_variable]; + + i= 0; + for (j = 0;j < nb_int_variable;j++) { + itype[i++] = INT_VALUE; + } + for (j = 0;j < nb_real_variable;j++) { + itype[i++] = REAL_VALUE; + } + + init(inb_sequence , NULL , ilength , NULL , iindex_param_type , + nb_int_variable + nb_real_variable , itype , false , false); + delete [] itype; + + if (!iidentifier.empty()) { + for (i = 0;i < nb_sequence;i++) { + identifier[i] = iidentifier[i]; + } + } + + if (!ivertex_identifier.empty()) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + vertex_identifier[i][j] = ivertex_identifier[i][j]; + } + } + } + + if (!iindex_parameter.empty()) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + index_parameter[i][j] = iindex_parameter[i][j]; + } + } + + build_index_parameter_frequency_distribution(); + index_interval_computation(); + } + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + k = 0; + for (m = 0;m < nb_int_variable;m++) { + int_sequence[i][k++][j] = iint_sequence[i][m][j]; + } + for (m = 0;m < nb_real_variable;m++) { + real_sequence[i][k++][j] = ireal_sequence[i][m][j]; + } + } + } + + for (i = 0;i < nb_variable;i++) { + min_value_computation(i); + max_value_computation(i); + + switch (type[i]) { + case INT_VALUE : + build_marginal_frequency_distribution(i); + break; + case REAL_VALUE : + build_marginal_histogram(i); + break; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Sequences class. + * + * \param[in] ilength_distribution sequence length frequency distribution, + * \param[in] inb_variable number of variables, + * \param[in] itype variable types, + * \param[in] init_flag flag initialization. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(const FrequencyDistribution &ilength_distribution , + int inb_variable , variable_nature *itype , bool init_flag) + +{ + int i , j , k; + int *plength; + + + nb_sequence = ilength_distribution.nb_element; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = i + 1; + } + + length = new int[nb_sequence]; + plength = length; + for (i = ilength_distribution.offset;i < ilength_distribution.nb_value;i++) { + for (j = 0;j < ilength_distribution.frequency[i];j++) { + *plength++ = i; + } + } + + max_length = ilength_distribution.nb_value - 1; + cumul_length_computation(); + length_distribution = new FrequencyDistribution(ilength_distribution); + + vertex_identifier = NULL; + + index_param_type = IMPLICIT_TYPE; + index_parameter_distribution = NULL; + index_interval = NULL; + index_parameter = NULL; + + nb_variable = inb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + if (itype) { + for (i = 0;i < nb_variable;i++) { + type[i] = itype[i]; + } + } + + else { + type[0] = STATE; + for (i = 1;i < nb_variable;i++) { + type[i] = INT_VALUE; + } + } + + for (i = 0;i < nb_variable;i++) { + min_value[i] = 0.; + max_value[i] = 0.; + marginal_distribution[i] = NULL; + marginal_histogram[i] = NULL; + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + if (init_flag) { + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = 0; + } + } + } + + else { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + + if (init_flag) { + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = 0.; + } + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a Sequences object from a RenewalData object. + * + * \param[in] timev reference on a RenewalData object. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(const RenewalData &timev) + +{ + int i , j; + + + nb_sequence = timev.nb_element; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = i + 1; + } + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = timev.length[i]; + } + + max_length_computation(); + cumul_length_computation(); + build_length_frequency_distribution(); + + vertex_identifier = NULL; + + index_param_type = IMPLICIT_TYPE; + index_parameter_distribution = NULL; + index_interval = NULL; + index_parameter = NULL; + + nb_variable = 1; + + type = new variable_nature[nb_variable]; + type[0] = INT_VALUE; + + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + + int_sequence[i][0] = new int[length[i]]; + real_sequence[i][0] = NULL; + + for (j = 0;j < length[i];j++) { + int_sequence[i][0][j] = timev.sequence[i][j]; + } + } + + min_value_computation(0); + max_value_computation(0); + build_marginal_frequency_distribution(0); + marginal_histogram[0] = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Sequences class. + * + * \param[in] seq reference on a Sequences object, + * \param[in] variable variable index, + * \param[in] itype selected variable type. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(const Sequences &seq , int variable , variable_nature itype) + +{ + int i , j , k; + int blength; + + + nb_sequence = seq.nb_sequence; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = seq.identifier[i]; + } + + max_length = seq.max_length; + cumul_length = seq.cumul_length; + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = seq.length[i]; + } + + length_distribution = new FrequencyDistribution(*(seq.length_distribution)); + + if (seq.vertex_identifier) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + vertex_identifier[i][j] = seq.vertex_identifier[i][j]; + } + } + } + + else { + vertex_identifier = NULL; + } + + index_param_type = seq.index_param_type; + + if (seq.index_parameter_distribution) { + index_parameter_distribution = new FrequencyDistribution(*(seq.index_parameter_distribution)); + } + else { + index_parameter_distribution = NULL; + } + + if (seq.index_interval) { + index_interval = new FrequencyDistribution(*(seq.index_interval)); + } + else { + index_interval = NULL; + } + + if (seq.index_parameter) { + index_parameter = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + blength = (index_param_type == POSITION ? length[i] + 1 : length[i]); + index_parameter[i] = new int[blength]; + for (j = 0;j < blength;j++) { + index_parameter[i][j] = seq.index_parameter[i][j]; + } + } + } + + else { + index_parameter = NULL; + } + + nb_variable = seq.nb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + if (i != variable) { + type[i] = seq.type[i]; + min_value[i] = seq.min_value[i]; + max_value[i] = seq.max_value[i]; + + if (seq.marginal_distribution[i]) { + marginal_distribution[i] = new FrequencyDistribution(*(seq.marginal_distribution[i])); + } + else { + marginal_distribution[i] = NULL; + } + + if (seq.marginal_histogram[i]) { + marginal_histogram[i] = new Histogram(*(seq.marginal_histogram[i])); + } + else { + marginal_histogram[i] = NULL; + } + } + + else { + type[i] = itype; + min_value[i] = 0.; + max_value[i] = 0.; + marginal_distribution[i] = NULL; + marginal_histogram[i] = NULL; + } + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + if (j != variable) { + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = seq.int_sequence[i][j][k]; + } + } + } + + else { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + + if (j != variable) { + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = seq.real_sequence[i][j][k]; + } + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the Sequences class. + * + * \param[in] seq reference on a Sequences object, + * \param[in] inb_sequence number of sequences, + * \param[in] index selected sequence indices. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(const Sequences &seq , int inb_sequence , int *index) + +{ + int i , j , k; + int blength; + + + nb_sequence = inb_sequence; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = seq.identifier[index[i]]; + } + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = seq.length[index[i]]; + } + + max_length_computation(); + cumul_length_computation(); + build_length_frequency_distribution(); + + if (seq.vertex_identifier) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + vertex_identifier[i][j] = seq.vertex_identifier[index[i]][j]; + } + } + } + + else { + vertex_identifier = NULL; + } + + index_param_type = seq.index_param_type; + +// if (index_param_type != IMPLICIT_TYPE) { + if (seq.index_parameter) { + index_parameter = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + blength = (index_param_type == POSITION ? length[i] + 1 : length[i]); + index_parameter[i] = new int[blength]; + for (j = 0;j < blength;j++) { + index_parameter[i][j] = seq.index_parameter[index[i]][j]; + } + } + + build_index_parameter_frequency_distribution(); + } + + else { + index_parameter_distribution = NULL; + index_interval = NULL; + index_parameter = NULL; + } + + nb_variable = seq.nb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + type[i] = seq.type[i]; + marginal_distribution[i] = NULL; + marginal_histogram[i] = NULL; + } + + if (index_parameter) { + index_interval_computation(); + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = seq.int_sequence[index[i]][j][k]; + } + } + + else { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = seq.real_sequence[index[i]][j][k]; + } + } + } + } + + for (i = 0;i < nb_variable;i++) { + min_value_computation(i); + max_value_computation(i); + + if (type[i] != AUXILIARY) { + if (type[i] != REAL_VALUE) { + build_marginal_frequency_distribution(i); + } + else { + build_marginal_histogram(i , seq.marginal_histogram[i]->bin_width); + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a Sequences object adding auxiliary variables. + * + * \param[in] seq reference on a Sequences object, + * \param[in] auxiliary flags on the addition of auxiliary variables. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(const Sequences &seq , bool *auxiliary) + +{ + int i , j , k , m; + int blength; + + + nb_sequence = seq.nb_sequence; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = seq.identifier[i]; + } + + max_length = seq.max_length; + cumul_length = seq.cumul_length; + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = seq.length[i]; + } + + length_distribution = new FrequencyDistribution(*(seq.length_distribution)); + + if (seq.vertex_identifier) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + vertex_identifier[i][j] = seq.vertex_identifier[i][j]; + } + } + } + + else { + vertex_identifier = NULL; + } + + index_param_type = seq.index_param_type; + + if (seq.index_parameter_distribution) { + index_parameter_distribution = new FrequencyDistribution(*(seq.index_parameter_distribution)); + } + else { + index_parameter_distribution = NULL; + } + + if (seq.index_interval) { + index_interval = new FrequencyDistribution(*(seq.index_interval)); + } + else { + index_interval = NULL; + } + + if (seq.index_parameter) { + index_parameter = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + blength = (index_param_type == POSITION ? length[i] + 1 : length[i]); + index_parameter[i] = new int[blength]; + for (j = 0;j < blength;j++) { + index_parameter[i][j] = seq.index_parameter[i][j]; + } + } + } + + else { + index_parameter = NULL; + } + + nb_variable = seq.nb_variable; + for (i = 0;i < seq.nb_variable;i++) { + if (auxiliary[i]) { + nb_variable++; + } + } + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + i = 0; + for (j = 0;j < seq.nb_variable;j++) { + type[i] = seq.type[j]; + min_value[i] = seq.min_value[j]; + max_value[i] = seq.max_value[j]; + + if (seq.marginal_distribution[j]) { + marginal_distribution[i] = new FrequencyDistribution(*(seq.marginal_distribution[j])); + } + else { + marginal_distribution[i] = NULL; + } + + if (seq.marginal_histogram[j]) { + marginal_histogram[i] = new Histogram(*(seq.marginal_histogram[j])); + } + else { + marginal_histogram[i] = NULL; + } + i++; + + if (auxiliary[j]) { + type[i] = AUXILIARY; + min_value[i] = 0.; + max_value[i] = 0.; + marginal_distribution[i] = NULL; + marginal_histogram[i] = NULL; + i++; + } + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + j = 0; + + for (k = 0;k < seq.nb_variable;k++) { + if (seq.type[k] != REAL_VALUE) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + for (m = 0;m < length[i];m++) { + int_sequence[i][j][m] = seq.int_sequence[i][k][m]; + } + } + + else { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + + for (m = 0;m < length[i];m++) { + real_sequence[i][j][m] = seq.real_sequence[i][k][m]; + } + } + + j++; + + if (auxiliary[k]) { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + j++; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Sequences object. + * + * \param[in] seq reference on a Sequences object. + */ +/*--------------------------------------------------------------*/ + +void Sequences::copy(const Sequences &seq) + +{ + int i , j , k; + int blength; + + + nb_sequence = seq.nb_sequence; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = seq.identifier[i]; + } + + max_length = seq.max_length; + cumul_length = seq.cumul_length; + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = seq.length[i]; + } + + length_distribution = new FrequencyDistribution(*(seq.length_distribution)); + + if (seq.vertex_identifier) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + vertex_identifier[i][j] = seq.vertex_identifier[i][j]; + } + } + } + + else { + vertex_identifier = NULL; + } + + index_param_type = seq.index_param_type; + + if (seq.index_parameter_distribution) { + index_parameter_distribution = new FrequencyDistribution(*(seq.index_parameter_distribution)); + } + else { + index_parameter_distribution = NULL; + } + + if (seq.index_interval) { + index_interval = new FrequencyDistribution(*(seq.index_interval)); + } + else { + index_interval = NULL; + } + + if (seq.index_parameter) { + index_parameter = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + blength = (index_param_type == POSITION ? length[i] + 1 : length[i]); + index_parameter[i] = new int[blength]; + for (j = 0;j < blength;j++) { + index_parameter[i][j] = seq.index_parameter[i][j]; + } + } + } + + else { + index_parameter = NULL; + } + + nb_variable = seq.nb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + type[i] = seq.type[i]; + min_value[i] = seq.min_value[i]; + max_value[i] = seq.max_value[i]; + + if (seq.marginal_distribution[i]) { + marginal_distribution[i] = new FrequencyDistribution(*(seq.marginal_distribution[i])); + } + else { + marginal_distribution[i] = NULL; + } + + if (seq.marginal_histogram[i]) { + marginal_histogram[i] = new Histogram(*(seq.marginal_histogram[i])); + } + else { + marginal_histogram[i] = NULL; + } + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = seq.int_sequence[i][j][k]; + } + } + + else { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = seq.real_sequence[i][j][k]; + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Sequences object reversing the direction of sequences. + * + * \param[in] seq reference on a Sequences object. + */ +/*--------------------------------------------------------------*/ + +void Sequences::reverse(const Sequences &seq) + +{ + int i , j , k; + int blength , end_position , *pidentifier , *cidentifier , *pindex_param , + *cindex_param , *pisequence , *cisequence; + double *prsequence , *crsequence; + + + nb_sequence = seq.nb_sequence; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = seq.identifier[i]; + } + + max_length = seq.max_length; + cumul_length = seq.cumul_length; + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = seq.length[i]; + } + + length_distribution = new FrequencyDistribution(*(seq.length_distribution)); + + if (seq.vertex_identifier) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + + pidentifier = vertex_identifier[i]; + cidentifier = seq.vertex_identifier[i] + length[i] - 1; + for (j = 0;j < length[i];j++) { + *pidentifier++ = *cidentifier--; + } + } + } + + else { + vertex_identifier = NULL; + } + + index_param_type = seq.index_param_type; + + if (seq.index_parameter_distribution) { + index_parameter_distribution = new FrequencyDistribution(*(seq.index_parameter_distribution)); + } + else { + index_parameter_distribution = NULL; + } + + if (seq.index_interval) { + index_interval = new FrequencyDistribution(*(seq.index_interval)); + } + else { + index_interval = NULL; + } + + if (seq.index_parameter) { + index_parameter = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + blength = (index_param_type == POSITION ? length[i] + 1 : length[i]); + index_parameter[i] = new int[blength]; + pindex_param = index_parameter[i]; + + if (index_param_type == POSITION) { + cindex_param = seq.index_parameter[i] + length[i]; + end_position = *cindex_param--; + for (j = 0;j < length[i];j++) { + *pindex_param++ = end_position - *cindex_param--; + } + *pindex_param = end_position; + } + + else if (index_param_type == TIME) { + cindex_param = seq.index_parameter[i] + length[i] - 1; + for (j = 0;j < length[i];j++) { + *pindex_param++ = index_parameter_distribution->nb_value - *cindex_param--; + } + } + + else { + cindex_param = seq.index_parameter[i] + length[i] - 1; + for (j = 0;j < length[i];j++) { + *pindex_param++ = *cindex_param--; + } + } + } + } + + else { + index_parameter = NULL; + } + + nb_variable = seq.nb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + type[i] = seq.type[i]; + min_value[i] = seq.min_value[i]; + max_value[i] = seq.max_value[i]; + + if (seq.marginal_distribution[i]) { + marginal_distribution[i] = new FrequencyDistribution(*(seq.marginal_distribution[i])); + } + else { + marginal_distribution[i] = NULL; + } + + if (seq.marginal_histogram[i]) { + marginal_histogram[i] = new Histogram(*(seq.marginal_histogram[i])); + } + else { + marginal_histogram[i] = NULL; + } + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + pisequence = int_sequence[i][j]; + cisequence = seq.int_sequence[i][j] + length[i] - 1; + for (k = 0;k < length[i];k++) { + *pisequence++ = *cisequence--; + } + } + + else { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + + prsequence = real_sequence[i][j]; + crsequence = seq.real_sequence[i][j] + length[i] - 1; + for (k = 0;k < length[i];k++) { + *prsequence++ = *crsequence--; + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Sequences object adding a state variable. + * + * \param[in] seq reference on a Sequences object. + */ +/*--------------------------------------------------------------*/ + +void Sequences::add_state_variable(const Sequences &seq) + +{ + int i , j , k; + int blength; + + + nb_sequence = seq.nb_sequence; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = seq.identifier[i]; + } + + max_length = seq.max_length; + cumul_length = seq.cumul_length; + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = seq.length[i]; + } + + length_distribution = new FrequencyDistribution(*(seq.length_distribution)); + + if (seq.vertex_identifier) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + vertex_identifier[i][j] = seq.vertex_identifier[i][j]; + } + } + } + + else { + vertex_identifier = NULL; + } + + index_param_type = seq.index_param_type; + + if (seq.index_parameter_distribution) { + index_parameter_distribution = new FrequencyDistribution(*(seq.index_parameter_distribution)); + } + else { + index_parameter_distribution = NULL; + } + + if (seq.index_interval) { + index_interval = new FrequencyDistribution(*(seq.index_interval)); + } + else { + index_interval = NULL; + } + + if (seq.index_parameter) { + index_parameter = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + blength = (index_param_type == POSITION ? length[i] + 1 : length[i]); + index_parameter[i] = new int[blength]; + for (j = 0;j < blength;j++) { + index_parameter[i][j] = seq.index_parameter[i][j]; + } + } + } + + else { + index_parameter = NULL; + } + + nb_variable = seq.nb_variable + 1; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + type[0] = STATE; + min_value[0] = 0.; + max_value[0] = 0.; + marginal_distribution[0] = NULL; + marginal_histogram[0] = NULL; + + for (i = 0;i < seq.nb_variable;i++) { + type[i + 1] = (seq.type[i] == STATE ? INT_VALUE : seq.type[i]); + min_value[i + 1] = seq.min_value[i]; + max_value[i + 1] = seq.max_value[i]; + + if (seq.marginal_distribution[i]) { + marginal_distribution[i + 1] = new FrequencyDistribution(*(seq.marginal_distribution[i])); + } + else { + marginal_distribution[i + 1] = NULL; + } + + if (seq.marginal_histogram[i]) { + marginal_histogram[i + 1] = new Histogram(*(seq.marginal_histogram[i])); + } + else { + marginal_histogram[i + 1] = NULL; + } + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + + int_sequence[i][0] = new int[length[i]]; + real_sequence[i][0] = NULL; + + for (j = 0;j < length[i];j++) { + int_sequence[i][0][j] = 0; + } + + for (j = 0;j < seq.nb_variable;j++) { + if (seq.type[j] != REAL_VALUE) { + int_sequence[i][j + 1] = new int[length[i]]; + real_sequence[i][j + 1] = NULL; + + for (k = 0;k < length[i];k++) { + int_sequence[i][j + 1][k] = seq.int_sequence[i][j][k]; + } + } + + else { + int_sequence[i][j + 1] = NULL; + real_sequence[i][j + 1] = new double[length[i]]; + + for (k = 0;k < length[i];k++) { + real_sequence[i][j + 1][k] = seq.real_sequence[i][j][k]; + } + } + } + } +} + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Sequences object transforming some given existing + * variable into an index parameter + * + * \param[in] error reference on a StatError object, + * \param[in] ivariable variable index (int) + * \param[in] index_param_type type of index parameter + * + * \return Sequences* object. */ +/*--------------------------------------------------------------*/ +Sequences* Sequences::set_variable_as_index_parameter(stat_tool::StatError &error, + int ivariable, + index_parameter_type index_param_type) const +{ + bool status = true; + int i, j, s; + int **iindex_parameter = NULL; + int ***iint_sequence = NULL;; + double ***ireal_sequence = NULL;; + Sequences *seq = NULL; + const int inb_sequence = this->nb_sequence; + const int inb_variable = this->nb_variable; + stat_tool::variable_nature *itype = NULL; + ostringstream error_message, correction_message; + + error.init(); + + if ((ivariable < 1) || (ivariable > nb_variable)) { + status = false; + error_message << ivariable << ": " << STAT_error[STATR_VARIABLE_INDEX]; + error.update((error_message.str()).c_str()); + } + if ((status) && (type[ivariable-1] != INT_VALUE)) { + status = false; + error_message << ivariable << ": " << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + + if (status) { + itype = new stat_tool::variable_nature[inb_variable-1]; + j = 0; // indices of variables in seq + iint_sequence = new int**[inb_sequence]; + ireal_sequence = new double**[inb_sequence]; + for (s = 0; s < inb_sequence; s++) { + iint_sequence[s] = new int*[inb_variable-1]; + ireal_sequence[s] = new double*[inb_variable-1]; + } + for (i = 0; i < inb_variable; i++) { // indices of variables in *this + if (i != ivariable-1) { + itype[j] = this->type[i]; + for (s = 0; s < inb_sequence; s++) { + if (itype[j] == INT_VALUE) { + iint_sequence[s][j] = this->int_sequence[s][i]; + ireal_sequence[s][j] = NULL; + } else { + ireal_sequence[s][j] = this->real_sequence[s][i]; + iint_sequence[s][j] = NULL; + } + } + j++; + } + } + iindex_parameter = new int*[inb_sequence]; + for (s = 0;s < inb_sequence;s++) { + iindex_parameter[s] = new int[index_param_type != POSITION ? this->length[s] : this->length[s]+1]; + iindex_parameter[s][0] = this->int_sequence[s][ivariable-1][0]; + for (j = 1;j < this->length[s];j++) { + iindex_parameter[s][j] = this->int_sequence[s][ivariable-1][j]; + if (iindex_parameter[s][j] < iindex_parameter[s][j-1]) { + status = false; + // if (error.get_nb_error() < error.get_max_nb_error())) { + if (error.get_nb_error() == 0) { + correction_message << " - " ; + correction_message << SEQ_label[SEQL_SEQUENCE] << " " << s << " - "; + correction_message << SEQ_label[SEQL_POSITION] << " " << j << " - "; + correction_message << SEQ_label[SEQL_INDEX] << " " << iindex_parameter[s][j] << " is less than previous " ; + correction_message << SEQ_label[SEQL_INDEX] << " " << iindex_parameter[s][j-1] << endl; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER], (correction_message.str()).c_str()); + } + } + } + if (index_param_type == POSITION ) + iindex_parameter[s][this->length[s]] = iindex_parameter[s][this->length[s]-1] + 1; + } + if (status) + seq = new Sequences(inb_sequence , this->identifier , this->length , + this->vertex_identifier, index_param_type, iindex_parameter , + this->nb_variable-1, itype , iint_sequence , ireal_sequence); + for (s = 0;s < inb_sequence;s++) { + delete [] iindex_parameter[s]; + iindex_parameter[s] = NULL; + delete [] iint_sequence[s]; + delete [] ireal_sequence[s]; + iint_sequence[s] = NULL; + ireal_sequence[s] = NULL; + } + delete [] itype; + itype = NULL; + delete [] iindex_parameter; + iindex_parameter = NULL; + delete [] iint_sequence; + iint_sequence = NULL; + delete [] ireal_sequence; + ireal_sequence = NULL; + + } + + return seq; + +} +/*--------------------------------------------------------------*/ +/** + * \brief Add index parameter + * + * \param[in] error reference on a StatError object, + * \param[in] index_parameter index values (int**) + * \param[in] index_param_type type of index parameter + * + * \return Sequences* object. */ +/*--------------------------------------------------------------*/ +void Sequences::set_index_parameter(stat_tool::StatError &error, int **iindex_parameter, + index_parameter_type iindex_param_type) +{ + bool status = true; + int j, s; + ostringstream correction_message; + + error.init(); + + for (s = 0;s < nb_sequence;s++) { + for (j = 1;j < (iindex_param_type == POSITION ? length[s]+1 : length[s]);j++) { + if (iindex_parameter[s][j] < iindex_parameter[s][j-1]) { + status = false; + if (error.get_nb_error() == 0) { + correction_message << " - " ; + correction_message << SEQ_label[SEQL_SEQUENCE] << " " << s << " - "; + correction_message << SEQ_label[SEQL_POSITION] << " " << j << " - "; + correction_message << SEQ_label[SEQL_INDEX] << " " << iindex_parameter[s][j] << " is less than previous " ; + correction_message << SEQ_label[SEQL_INDEX] << " " << iindex_parameter[s][j-1] << endl; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER], (correction_message.str()).c_str()); + } + } + } + } + + if (status) { + index_param_type = iindex_param_type; + if (index_parameter != NULL) { + for (s = 0; s < nb_sequence; s++){ + delete [] index_parameter[s]; + index_parameter[s] = NULL; + } + delete [] index_parameter; + index_parameter; + } + index_parameter = new int*[nb_sequence]; + for (s = 0;s < nb_sequence;s++) { + index_parameter[s] = new int[iindex_param_type == POSITION ? length[s]+1 : length[s]]; + for (j = 0;j < (iindex_param_type == POSITION ? length[s]+1 : length[s]);j++) { + index_parameter[s][j] = iindex_parameter[s][j]; + } + } + if (index_parameter_distribution != NULL) { + delete index_parameter_distribution; + index_parameter_distribution = NULL; + } + if (index_interval != NULL) { + delete index_interval; + index_interval = NULL; + } + + build_index_parameter_frequency_distribution(); + + if ((index_param_type == TIME) || (index_param_type == POSITION)) { + index_interval_computation(); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Sequences object transforming the implicit index parameters in + * explicit index parameters. + * + * \param[in] seq reference on a Sequences object. + */ +/*--------------------------------------------------------------*/ + +void Sequences::explicit_index_parameter(const Sequences &seq) + +{ + int i , j; + + + Sequences::copy(seq); + + index_param_type = TIME; + + index_parameter = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + index_parameter[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + index_parameter[i][j] = j; + } + } + + build_index_parameter_frequency_distribution(); + index_interval_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Sequences object removing the index parameters. + * + * \param[in] seq reference on a Sequences object. + */ +/*--------------------------------------------------------------*/ + +void Sequences::remove_index_parameter(const Sequences &seq) + +{ + int i , j , k; + + + nb_sequence = seq.nb_sequence; + + identifier = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + identifier[i] = seq.identifier[i]; + } + + max_length = seq.max_length; + cumul_length = seq.cumul_length; + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + length[i] = seq.length[i]; + } + + length_distribution = new FrequencyDistribution(*(seq.length_distribution)); + + if (seq.vertex_identifier) { + vertex_identifier = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + vertex_identifier[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + vertex_identifier[i][j] = seq.vertex_identifier[i][j]; + } + } + } + + else { + vertex_identifier = NULL; + } + + index_param_type = IMPLICIT_TYPE; + index_parameter_distribution = NULL; + index_interval = NULL; + index_parameter = NULL; + + nb_variable = seq.nb_variable; + + type = new variable_nature[nb_variable]; + min_value = new double[nb_variable]; + max_value = new double[nb_variable]; + marginal_distribution = new FrequencyDistribution*[nb_variable]; + marginal_histogram = new Histogram*[nb_variable]; + + for (i = 0;i < nb_variable;i++) { + type[i] = seq.type[i]; + min_value[i] = seq.min_value[i]; + max_value[i] = seq.max_value[i]; + + if (seq.marginal_distribution[i]) { + marginal_distribution[i] = new FrequencyDistribution(*(seq.marginal_distribution[i])); + } + else { + marginal_distribution[i] = NULL; + } + + if (seq.marginal_histogram[i]) { + marginal_histogram[i] = new Histogram(*(seq.marginal_histogram[i])); + } + else { + marginal_histogram[i] = NULL; + } + } + + int_sequence = new int**[nb_sequence]; + real_sequence = new double**[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_sequence[i] = new int*[nb_variable]; + real_sequence[i] = new double*[nb_variable]; + for (j = 0;j < nb_variable;j++) { + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + int_sequence[i][j] = new int[length[i]]; + real_sequence[i][j] = NULL; + + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = seq.int_sequence[i][j][k]; + } + } + + else { + int_sequence[i][j] = NULL; + real_sequence[i][j] = new double[length[i]]; + + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = seq.real_sequence[i][j][k]; + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor by copy of the Sequences class. + * + * \param[in] seq reference on a Sequences object, + * \param[in] transform type of transform. + */ +/*--------------------------------------------------------------*/ + +Sequences::Sequences(const Sequences &seq , sequence_transformation transform) + +{ + switch (transform) { + case REVERSE : + Sequences::reverse(seq); + break; + case ADD_STATE_VARIABLE : + Sequences::add_state_variable(seq); + break; + case EXPLICIT_INDEX_PARAMETER : + Sequences::explicit_index_parameter(seq); + break; + case REMOVE_INDEX_PARAMETER : + Sequences::remove_index_parameter(seq); + break; + default : + Sequences::copy(seq); + break; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a Sequences object. + */ +/*--------------------------------------------------------------*/ + +void Sequences::remove() + +{ + int i , j; + + + delete [] identifier; + + delete [] length; + delete length_distribution; + + if (vertex_identifier) { + for (i = 0;i < nb_sequence;i++) { + delete [] vertex_identifier[i]; + } + delete [] vertex_identifier; + } + + delete index_parameter_distribution; + delete index_interval; + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + delete [] index_parameter[i]; + } + delete [] index_parameter; + } + + delete [] type; + delete [] min_value; + delete [] max_value; + + if (marginal_distribution) { + for (i = 0;i < nb_variable;i++) { + delete marginal_distribution[i]; + } + delete [] marginal_distribution; + } + + if (marginal_histogram) { + for (i = 0;i < nb_variable;i++) { + delete marginal_histogram[i]; + } + delete [] marginal_histogram; + } + + if (int_sequence) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + delete [] int_sequence[i][j]; + } + delete [] int_sequence[i]; + } + delete [] int_sequence; + } + + if (real_sequence) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + delete [] real_sequence[i][j]; + } + delete [] real_sequence[i]; + } + delete [] real_sequence; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the Sequences class. + */ +/*--------------------------------------------------------------*/ + +Sequences::~Sequences() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the Sequences class. + * + * \param[in] seq reference on a Sequences object. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences& Sequences::operator=(const Sequences &seq) + +{ + if (&seq != this) { + remove(); + copy(seq); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the marginal frequency distribution for a positive integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index. + * + * \return DiscreteDistributionData object. + */ +/*--------------------------------------------------------------*/ + +DiscreteDistributionData* Sequences::extract(StatError &error , int variable) const + +{ + bool status = true; + DiscreteDistributionData *histo; + + + histo = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else if (!marginal_distribution[variable]) { + status = false; + error.update(STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]); + } + } + + if (status) { + histo = new DiscreteDistributionData(*marginal_distribution[variable]); + } + + return histo; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a Vectors object from a Sequences object. + * + * \param[in] index_variable flag index parameter variable. + * + * \return Vectors object. + */ +/*--------------------------------------------------------------*/ + +Vectors* Sequences::build_vectors(bool index_variable) const + +{ + int i , j , k , m; + int offset , **int_vector; + variable_nature *itype; + double **real_vector; + Vectors *vec; + + + if (index_parameter) { + index_variable = true; + } + offset = (index_variable ? 1 : 0); + + itype = new variable_nature[nb_variable + offset]; + + if (index_variable) { + itype[0] = INT_VALUE; + } + + for (i = 0;i < nb_variable;i++) { + switch (type[i]) { + case STATE : + itype[i + offset] = INT_VALUE; + break; + case AUXILIARY : + itype[i + offset] = REAL_VALUE; + break; + default : + itype[i + offset] = type[i]; + break; + } + } + + int_vector = new int*[cumul_length]; + for (i = 0;i < cumul_length;i++) { + int_vector[i] = new int[nb_variable + offset]; + } + + real_vector = new double*[cumul_length]; + for (i = 0;i < cumul_length;i++) { + real_vector[i] = new double[nb_variable + offset]; + } + + i = 0; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + if (index_variable) { + if (index_parameter) { + int_vector[i][0] = index_parameter[j][k]; + } + else { + int_vector[i][0] = k; + } + } + + for (m = 0;m < nb_variable;m++) { + if ((type[m] != REAL_VALUE) && (type[m] != AUXILIARY)) { + int_vector[i][m + offset] = int_sequence[j][m][k]; + } + else { + real_vector[i][m + offset] = real_sequence[j][m][k]; + } + } + + i++; + } + } + + vec = new Vectors(cumul_length , NULL , nb_variable + offset , itype , int_vector , real_vector); + delete [] itype; + + for (i = 0;i < cumul_length;i++) { + delete [] int_vector[i]; + } + delete [] int_vector; + + for (i = 0;i < cumul_length;i++) { + delete [] real_vector[i]; + } + delete [] real_vector; + + return vec; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of global measures (length, time to the 1st occurrence of a value, + * number of runs or occurrences of a value, mean, cumulative value) for each sequence. + * + * \param[in] error reference on a StatError object, + * \param[in] pattern measure type, + * \param[in] variable variable index, + * \param[in] value value. + * + * \return Vectors object. + */ +/*--------------------------------------------------------------*/ + +Vectors* Sequences::extract_vectors(StatError &error , sequence_pattern pattern , + int variable , int value) const + +{ + bool status = true; + int i , j; + int begin_run , count , **int_vector; + variable_nature itype[1]; + double **real_vector; + Vectors *vec; + + + vec = NULL; + error.init(); + + if (variable != I_DEFAULT) { + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((pattern == SEQUENCE_CUMUL) || (pattern == SEQUENCE_MEAN)) { + if ((type[variable] != INT_VALUE) && (type[variable] != STATE) && + (type[variable] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE] + << " or " << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + +// else if ((pattern == FIRST_OCCURRENCE) || (pattern == NB_RUN) || +// (pattern == NB_OCCURRENCE)) { + else { + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + if ((value < min_value[variable]) || (value > max_value[variable]) || + ((marginal_distribution[variable]) && (marginal_distribution[variable]->frequency[value] == 0))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VALUE] << " " << value << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + } + } + } + + if (status) { + switch (pattern) { + case SEQUENCE_CUMUL : + itype[0] = type[variable]; + break; + case SEQUENCE_MEAN : + itype[0] = REAL_VALUE; + break; + default : + itype[0] = INT_VALUE; + break; + } + + int_vector = new int*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + int_vector[i] = new int[1]; + } + + real_vector = new double*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + real_vector[i] = new double[1]; + } + + switch (pattern) { + + case LENGTH_PATTERN : { + if (index_param_type == POSITION) { + for (i = 0;i < nb_sequence;i++) { + int_vector[i][0] = index_parameter[i][length[i]]; + } + } + + else if ((index_param_type == TIME) && (index_interval->variance > 0.)) { + for (i = 0;i < nb_sequence;i++) { + int_vector[i][0] = index_parameter[i][length[i] - 1]; + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + int_vector[i][0] = length[i]; + } + } + break; + } + + case SEQUENCE_CUMUL : { + if (type[variable] != REAL_VALUE) { + for (i = 0;i < nb_sequence;i++) { + int_vector[i][0] = 0; + for (j = 0;j < length[i];j++) { + int_vector[i][0] += int_sequence[i][variable][j]; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + real_vector[i][0] = 0.; + for (j = 0;j < length[i];j++) { + real_vector[i][0] += real_sequence[i][variable][j]; + } + } + } + break; + } + + case SEQUENCE_MEAN : { + if (type[variable] != REAL_VALUE) { + for (i = 0;i < nb_sequence;i++) { + real_vector[i][0] = 0.; + for (j = 0;j < length[i];j++) { + real_vector[i][0] += int_sequence[i][variable][j]; + } + real_vector[i][0] /= length[i]; + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + real_vector[i][0] = 0.; + for (j = 0;j < length[i];j++) { + real_vector[i][0] += real_sequence[i][variable][j]; + } + real_vector[i][0] /= length[i]; + } + } + break; + } + + case FIRST_OCCURRENCE_PATTERN : { + if (index_param_type != IMPLICIT_TYPE) { + for (i = 0;i < nb_sequence;i++) { + int_vector[i][0] = -1; + for (j = 0;j < length[i];j++) { + if (int_sequence[i][variable][j] == value) { + int_vector[i][0] = index_parameter[i][j]; + break; + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + int_vector[i][0] = -1; + for (j = 0;j < length[i];j++) { + if (int_sequence[i][variable][j] == value) { + int_vector[i][0] = j; + break; + } + } + } + } + break; + } + + case SOJOURN_TIME_PATTERN : { + if ((index_param_type == TIME) && (index_interval->variance > 0.)) { // for the mango growth follow-ups + for (i = 0;i < nb_sequence;i++) { + int_vector[i][0] = -1; + if (int_sequence[i][variable][0] == value) { + begin_run = 0; + } + + for (j = 0;j < length[i] - 1;j++) { + if (int_sequence[i][variable][j + 1] != int_sequence[i][variable][j]) { + if (int_sequence[i][variable][j + 1] == value) { + begin_run = index_parameter[i][j + 1]; + } + else if (int_sequence[i][variable][j] == value) { + int_vector[i][0] = index_parameter[i][j + 1] - begin_run - 1; + break; + } + } + } + + if ((j == length[i] - 1) && (int_sequence[i][variable][length[i] - 1] == value)) { + int_vector[i][0] = index_parameter[i][j] - begin_run; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { +// int_vector[i][0] = -1; + int_vector[i][0] = 0; + if (int_sequence[i][variable][0] == value) { + begin_run = 0; + } + + for (j = 0;j < length[i] - 1;j++) { + if (int_sequence[i][variable][j + 1] != int_sequence[i][variable][j]) { + if (int_sequence[i][variable][j + 1] == value) { + begin_run = j + 1; + } + else if (int_sequence[i][variable][j] == value) { + int_vector[i][0] = j + 1 - begin_run; + break; + } + } + } + + if ((j == length[i] - 1) && (int_sequence[i][variable][length[i] - 1] == value)) { + int_vector[i][0] = length[i] - begin_run; + } + } + } + break; + } + + case NB_RUN_PATTERN : { + for (i = 0;i < nb_sequence;i++) { + count = 0; + if (int_sequence[i][variable][0] == value) { + count++; + } + for (j = 1;j < length[i];j++) { + if ((int_sequence[i][variable][j] != int_sequence[i][variable][j - 1]) && + (int_sequence[i][variable][j] == value)) { + count++; + } + } + + int_vector[i][0] = count; + } + break; + } + + case NB_OCCURRENCE_PATTERN : { + for (i = 0;i < nb_sequence;i++) { + count = 0; + for (j = 0;j < length[i];j++) { + if (int_sequence[i][variable][j] == value) { + count++; + } + } + + int_vector[i][0] = count; + } + break; + } + } + + vec = new Vectors(nb_sequence , identifier , 1 , itype , int_vector , real_vector); + + for (i = 0;i < nb_sequence;i++) { + delete [] int_vector[i]; + } + delete [] int_vector; + + for (i = 0;i < nb_sequence;i++) { + delete [] real_vector[i]; + } + delete [] real_vector; + } + + return vec; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a MarkovianSequences object from a Sequences object. + * + * \param[in] error reference on a StatError object. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* Sequences::markovian_sequences(StatError &error) const + +{ + bool status = true; + int i; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + +// if (((index_param_type == TIME) && (index_interval->variance > 0.)) || +// (index_param_type == POSITION)) { + if (index_param_type == POSITION) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (max_value[i] == min_value[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_NB_VALUE]; + error.update((error_message.str()).c_str()); + } + + if ((type[i] == INT_VALUE) || (type[i] == STATE)) { + if (min_value[i] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + + if (!marginal_distribution[i]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + seq = new MarkovianSequences(*this); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Checking of (strictly) increasing index parameters within sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] strict flag stricty increasing or not, + * \param[in] pattern_label label. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::increasing_index_parameter_checking(StatError &error , bool strict , + const char *pattern_label) const + +{ + bool status = true; + int i , j; + + + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + if ((((!strict) || (j == length[i])) && (index_parameter[i][j] < index_parameter[i][j - 1])) || + ((strict) && (j < length[i]) && (index_parameter[i][j] <= index_parameter[i][j - 1]))) { + status = false; + ostringstream error_message; + error_message << pattern_label << " " << i + 1 << ": " + << (index_param_type == TIME ? SEQ_label[SEQL_TIME] : SEQ_label[SEQL_POSITION]) << " " + << index_parameter[i][j] << " " << STAT_error[STATR_NOT_ALLOWED]; + error.update((error_message.str()).c_str()); + } + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Checking of (strictly) increasing sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] strict flag stricty increasing or not, + * \param[in] pattern_label pattern label, + * \param[in] variable_label variable label. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::increasing_sequence_checking(StatError &error , int variable , bool strict , + const char *pattern_label , const char *variable_label) const + +{ + bool status = true; + int i , j; + + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < length[i];j++) { + if (((!strict) && (int_sequence[i][variable][j] < int_sequence[i][variable][j - 1])) || + ((strict) && (int_sequence[i][variable][j] <= int_sequence[i][variable][j - 1]))) { + status = false; + ostringstream error_message; + error_message << pattern_label << " " << i + 1 << ": " << STAT_label[STATL_VARIABLE] << " " + << variable + 1 << ": " << variable_label << " " + << int_sequence[i][variable][j] << " " << STAT_error[STATR_NOT_ALLOWED]; + error.update((error_message.str()).c_str()); + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < length[i];j++) { + if (((!strict) && (real_sequence[i][variable][j] < real_sequence[i][variable][j - 1])) || + ((strict) && (real_sequence[i][variable][j] <= real_sequence[i][variable][j - 1]))) { + status = false; + ostringstream error_message; + error_message << pattern_label << " " << i + 1 << ": " << STAT_label[STATL_VARIABLE] << " " + << variable + 1 << ": " << variable_label << " " + << real_sequence[i][variable][j] << " " << STAT_error[STATR_NOT_ALLOWED]; + error.update((error_message.str()).c_str()); + } + } + } + break; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Checking of a Sequences object. + * + * \param[in] error reference on a StatError object, + * \param[in] pattern_label label. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::check(StatError &error , const char *pattern_label) + +{ + bool status = true , lstatus; + + + error.init(); + + if (nb_variable > SEQUENCE_NB_VARIABLE) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + + if (max_length == 1) { + status = false; + error.update(SEQ_parsing[SEQP_MAX_SEQUENCE_LENGTH]); + } + + lstatus = identifier_checking(error , nb_sequence , identifier); + if (!lstatus) { + status = false; + } + + if (index_param_type != IMPLICIT_TYPE) { + lstatus = increasing_index_parameter_checking(error , (index_param_type == POSITION ? false : true) , + pattern_label); + + if (!lstatus) { + status = false; + } + } + + if (status) { + if (index_parameter) { + build_index_parameter_frequency_distribution(); + } +// if ((index_param_type == TIME) || ((index_param_type == POSITION) && +// (type[0] != NB_INTERNODE))) { + if ((index_param_type == TIME) || (index_param_type == POSITION)) { + index_interval_computation(); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a TimeEvents object from a Sequences object. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] begin_date begin date, + * \param[in] end_date end date, + * \param[in] previous_date previous date, + * \param[in] next_date next date. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* Sequences::extract_time_events(StatError &error , int variable , + int begin_date , int end_date , + int previous_date , int next_date) const + +{ + bool status = true , lstatus; + int i , j; + int nb_element , previous , begin , end , next , *time , *nb_event , *pdate; + TimeEvents *timev; + + + timev = NULL; + error.init(); + + if (index_param_type != TIME) { + status = false; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE] , SEQ_index_parameter_word[TIME]); + } + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + lstatus = increasing_sequence_checking(error , variable , false , SEQ_label[SEQL_SEQUENCE] , + STAT_label[STATL_VALUE]); + if (!lstatus) { + status = false; + } + } + } + + if (begin_date >= end_date) { + status = false; + error.update(SEQ_error[SEQR_DATE_ORDER]); + } + + if (previous_date != I_DEFAULT) { + if (previous_date > begin_date) { + status = false; + error.update(SEQ_error[SEQR_DATE_ORDER]); + } + } + else { + previous_date = begin_date; + } + + if (next_date != I_DEFAULT) { + if (next_date < end_date) { + status = false; + error.update(SEQ_error[SEQR_DATE_ORDER]); + } + } + else { + next_date = end_date; + } + + if (status) { + time = new int[nb_sequence]; + nb_event = new int[nb_sequence]; + nb_element = 0; + + for (i = 0;i < nb_sequence;i++) { + pdate = index_parameter[i]; + previous = I_DEFAULT; + begin = I_DEFAULT; + end = I_DEFAULT; + next = I_DEFAULT; + + for (j = 0;j < length[i];j++) { + if (*pdate == previous_date) { + previous = j; + } + if (*pdate == begin_date) { + begin = j; + } + if (*pdate == end_date) { + end = j; + } + if (*pdate == next_date) { + next = j; + break; + } + pdate++; + } + + if ((previous != I_DEFAULT) && (begin != I_DEFAULT) && (end != I_DEFAULT) && + (next != I_DEFAULT) && ((previous == begin) || ((previous < begin) && + (int_sequence[i][variable][previous] < int_sequence[i][variable][begin]))) && ((end == next) || + ((end < next) && (int_sequence[i][variable][end] < int_sequence[i][variable][next])))) { + time[nb_element] = end_date - begin_date; + nb_event[nb_element++] = int_sequence[i][variable][end] - int_sequence[i][variable][begin]; + } + } + + if (nb_element == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + else { + timev = new TimeEvents(nb_element , time , nb_event); + } + + delete [] time; + delete [] nb_event; + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a RenewalData object from a Sequences object. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] begin_index_parameter begin index parameter, + * \param[in] end_index_parameter end index parameter. + * + * \return RenewalData object. + */ +/*--------------------------------------------------------------*/ + +RenewalData* Sequences::extract_renewal_data(StatError &error , int variable , + int begin_index_parameter , int end_index_parameter) const + +{ + bool status = true , lstatus; + int i , j; + int nb_element , index , *ptime , *pnb_event , *pisequence , *cisequence; + RenewalData *timev; + + + timev = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + lstatus = increasing_sequence_checking(error , variable , false , SEQ_label[SEQL_SEQUENCE] , + STAT_label[STATL_VALUE]); + if (!lstatus) { + status = false; + } + } + } + + if ((begin_index_parameter < 0) || (begin_index_parameter + 1 >= max_length) || + (begin_index_parameter > end_index_parameter)) { + status = false; + error.update(SEQ_error[SEQR_BEGIN_INDEX_PARAMETER]); + } + if ((end_index_parameter < 0) || (end_index_parameter + 1 >= max_length) || + (end_index_parameter < begin_index_parameter)) { + status = false; + error.update(SEQ_error[SEQR_END_INDEX_PARAMETER]); + } + + if (status) { + timev = new RenewalData(nb_sequence , end_index_parameter + 1 - begin_index_parameter); + + ptime = new int[nb_sequence]; + pnb_event = new int[nb_sequence]; + + nb_element = 0; + for (i = 0;i < nb_sequence;i++) { + if (end_index_parameter + 1 < length[i]) { + *ptime++ = end_index_parameter + 1 - begin_index_parameter; + *pnb_event++ = int_sequence[i][variable][end_index_parameter + 1] - + int_sequence[i][variable][begin_index_parameter]; + + timev->length[nb_element] = end_index_parameter + 1 - begin_index_parameter; + timev->sequence[nb_element] = new int[timev->length[nb_element]]; + + pisequence = timev->sequence[nb_element++]; + cisequence = int_sequence[i][variable] + begin_index_parameter; + index = begin_index_parameter; + for (j = begin_index_parameter + 1;j <= end_index_parameter + 1;j++) { + *pisequence = *(cisequence + 1) - *cisequence; + cisequence++; + + if (*pisequence > 0) { + if (index == begin_index_parameter) { + (timev->forward->frequency[j - index])++; + } + else { + (timev->within->frequency[j - index])++; + } + index = j; + } + pisequence++; + } + + if (index > begin_index_parameter) { + (timev->backward->frequency[end_index_parameter + 1 - index])++; + } + } + } + + // construction of the triplets {observation period, number of events, frequency}, of + // the observation period frequency distribution and the number of events frequency distributions + + ptime -= nb_element; + pnb_event -= nb_element; + + timev->build(nb_element , ptime , pnb_event); + delete [] ptime; + delete [] pnb_event; + + // extraction of the characteristics of the inter-event frequency distribution, + // the frequency distribution of time intervals between events within the observation period, + // the backward and forward recurrence time frequency distributions, + + timev->within->nb_value_computation(); + timev->within->offset_computation(); + timev->within->nb_element_computation(); + timev->within->max_computation(); + timev->within->mean_computation(); + timev->within->variance_computation(); + + timev->backward->nb_value_computation(); + timev->backward->offset_computation(); + timev->backward->nb_element_computation(); + timev->backward->max_computation(); + timev->backward->mean_computation(); + timev->backward->variance_computation(); + + timev->forward->nb_value_computation(); + timev->forward->offset_computation(); + timev->forward->nb_element_computation(); + timev->forward->max_computation(); + timev->forward->mean_computation(); + timev->forward->variance_computation(); + + timev->build_index_event(1); + + if ((timev->backward->nb_element == 0) && (timev->forward->nb_element == 0)) { + delete timev; + timev = NULL; + error.update(SEQ_error[SEQR_BOTH_END_CENSORED_INTERVAL]); + } + } + + return timev; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/sequences2.cpp b/src/cpp/sequence_analysis/sequences2.cpp new file mode 100644 index 0000000..637258c --- /dev/null +++ b/src/cpp/sequence_analysis/sequences2.cpp @@ -0,0 +1,8317 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "stat_tool/stat_label.h" + +#include "stat_tool/quantile_computation.hpp" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost::math; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of Sequences objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of Sequences objects, + * \param[in] iseq pointer on the Sequences objects. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::merge(StatError &error , int nb_sample , const Sequences **iseq) const + +{ + bool status = true; + int i , j , k , m , n , p , q; + int inb_sequence , cumul_nb_sequence , *ilength , *iidentifier , **ivertex_identifier; + const FrequencyDistribution **phisto; + Sequences *seq; + const Sequences **pseq; + + + seq = NULL; + error.init(); + + for (i = 0;i < nb_sample;i++) { + if (iseq[i]->index_param_type != index_param_type) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_error[SEQR_INDEX_PARAMETER_TYPE]; + + if (index_param_type == IMPLICIT_TYPE) { + error.update((error_message.str()).c_str()); + } + else { + error.correction_update((error_message.str()).c_str() , SEQ_index_parameter_word[index_param_type]); + } + } + } + + for (i = 0;i < nb_sample;i++) { + if (iseq[i]->nb_variable != nb_variable) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << STAT_error[STATR_NB_VARIABLE]; + error.correction_update((error_message.str()).c_str() , nb_variable); + } + + else { + for (j = 0;j < nb_variable;j++) { + if (iseq[i]->type[j] != type[j]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << STAT_label[STATL_VARIABLE] << " " << j + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[type[j]]); + } + } + } + } + + if (status) { + nb_sample++; + pseq = new const Sequences*[nb_sample]; + + pseq[0] = this; + for (i = 1;i < nb_sample;i++) { + pseq[i] = iseq[i - 1]; + } + + // computation of the number of sequences + + inb_sequence = 0; + for (i = 0;i < nb_sample;i++) { + inb_sequence += pseq[i]->nb_sequence; + } + + // comparison of the sequence identifiers + + iidentifier = new int[inb_sequence]; + + cumul_nb_sequence = 0; + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + iidentifier[i] = pseq[j]->identifier[k]; + + for (m = 0;m < cumul_nb_sequence;m++) { + if (iidentifier[i] == iidentifier[m]) { + delete [] iidentifier; + iidentifier = NULL; + break; + } + } + + if (!iidentifier) { + break; + } + i++; + } + + if (!iidentifier) { + break; + } + cumul_nb_sequence += pseq[j]->nb_sequence; + } + + // copy of sequence lengths + + ilength = new int[inb_sequence]; + + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + ilength[i++] = pseq[j]->length[k]; + } + } + + // comparison of vertex identifiers + + for (i = 0;i < nb_sample;i++) { + if (!(pseq[i]->vertex_identifier)) { + break; + } + } + + if (i == nb_sample) { + ivertex_identifier = new int*[inb_sequence]; + + cumul_nb_sequence = 0; + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + ivertex_identifier[i] = new int[pseq[j]->length[k]]; + for (m = 0;m < pseq[j]->length[k];m++) { + ivertex_identifier[i][m] = pseq[j]->vertex_identifier[k][m]; + + for (n = 0;n < cumul_nb_sequence;n++) { + for (p = 0;p < ilength[n];p++) { + if (ivertex_identifier[i][m] == ivertex_identifier[n][p]) { + for (q = 0;q <= i;q++) { + delete [] ivertex_identifier[q]; + } + delete [] ivertex_identifier; + ivertex_identifier = NULL; + break; + } + } + + if (!ivertex_identifier) { + break; + } + } + + if (!ivertex_identifier) { + break; + } + } + + if (!ivertex_identifier) { + break; + } + i++; + } + + if (!ivertex_identifier) { + break; + } + cumul_nb_sequence += pseq[j]->nb_sequence; + } + } + + else { + ivertex_identifier = NULL; + } + + seq = new Sequences(inb_sequence , iidentifier , ilength , ivertex_identifier , + index_param_type , nb_variable , type); + delete [] iidentifier; + delete [] ilength; + + if (ivertex_identifier) { + for (i = 0;i < inb_sequence;i++) { + delete [] ivertex_identifier[i]; + } + delete [] ivertex_identifier; + } + + phisto = new const FrequencyDistribution*[nb_sample]; + + // copy of index parameters + + if (seq->index_parameter) { + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + for (m = 0;m < (pseq[j]->index_param_type == POSITION ? pseq[j]->length[k] + 1 : pseq[j]->length[k]);m++) { + seq->index_parameter[i][m] = pseq[j]->index_parameter[k][m]; + } + i++; + } + } + + for (i = 0;i < nb_sample;i++) { + phisto[i] = pseq[i]->index_parameter_distribution; + } + seq->index_parameter_distribution = new FrequencyDistribution(nb_sample , phisto); + } + +// if ((seq->index_param_type == TIME) || ((seq->index_param_type == POSITION) && +// (seq->type[0] != NB_INTERNODE))) { + if ((seq->index_param_type == TIME) || (seq->index_param_type == POSITION)) { + for (i = 0;i < nb_sample;i++) { + phisto[i] = pseq[i]->index_interval; + } + seq->index_interval = new FrequencyDistribution(nb_sample , phisto); + } + + // copy of values + + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_sequence;k++) { + for (m = 0;m < pseq[j]->nb_variable;m++) { + if ((pseq[j]->type[m] != REAL_VALUE) && (pseq[j]->type[m] != AUXILIARY)) { + for (n = 0;n < pseq[j]->length[k];n++) { + seq->int_sequence[i][m][n] = pseq[j]->int_sequence[k][m][n]; + } + } + + else { + for (n = 0;n < pseq[j]->length[k];n++) { + seq->real_sequence[i][m][n] = pseq[j]->real_sequence[k][m][n]; + } + } + } + i++; + } + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value[i] = pseq[0]->min_value[i]; + seq->max_value[i] = pseq[0]->max_value[i]; + for (j = 1;j < nb_sample;j++) { + if (pseq[j]->min_value[i] < seq->min_value[i]) { + seq->min_value[i] = pseq[j]->min_value[i]; + } + if (pseq[j]->max_value[i] > seq->max_value[i]) { + seq->max_value[i] = pseq[j]->max_value[i]; + } + } + + if (seq->type[i] != AUXILIARY) { + for (j = 0;j < nb_sample;j++) { + if (pseq[j]->marginal_distribution[i]) { + phisto[j] = pseq[j]->marginal_distribution[i]; + } + else { + break; + } + } + + if (j == nb_sample) { + seq->marginal_distribution[i] = new FrequencyDistribution(nb_sample , phisto); + } + + else { + seq->build_marginal_histogram(i); + } + } + } + + delete [] pseq; + delete [] phisto; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of Sequences objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of Sequences objects, + * \param[in] iseq pointer on the Sequences objects. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::merge(StatError &error , int nb_sample , const vector &iseq) const + +{ + int i; + Sequences *seq; + const Sequences **pseq; + + + pseq = new const Sequences*[nb_sample]; + for (i = 0;i < nb_sample;i++) { + pseq[i] = new Sequences(iseq[i]); + } + + seq = merge(error , nb_sample , pseq); + + for (i = 0;i < nb_sample;i++) { + delete pseq[i]; + } + delete [] pseq; + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Shifting of values of a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] shift_param integer shifting parameter. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::shift(StatError &error , int variable , int shift_param) const + +{ + bool status = true; + int i , j; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] == INT_VALUE) { + if (shift_param + min_value[variable] < INT_MIN) { + status = false; + ostringstream correction_message; + correction_message << STAT_error[STATR_GREATER_THAN] << " " + << INT_MIN - min_value[variable]; + error.correction_update(STAT_error[STATR_SHIFT_VALUE] , (correction_message.str()).c_str()); + } + + if (shift_param + max_value[variable] > INT_MAX) { + status = false; + ostringstream correction_message; + correction_message << STAT_error[STATR_SMALLER_THAN] << " " + << INT_MAX - max_value[variable]; + error.correction_update(STAT_error[STATR_SHIFT_VALUE] , (correction_message.str()).c_str()); + } + } + + else if (type[variable] != REAL_VALUE) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + } + + if (status) { + seq = new Sequences(*this , variable , type[variable]); + + switch (seq->type[variable]) { + + // shifting of integer values + + case INT_VALUE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->int_sequence[i][variable][j] = int_sequence[i][variable][j] + shift_param; + } + } + break; + } + + // shifting of real values + + case REAL_VALUE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable][j] = real_sequence[i][variable][j] + shift_param; + } + } + break; + } + } + + seq->min_value[variable] = min_value[variable] + shift_param; + seq->max_value[variable] = max_value[variable] + shift_param; + + if ((variable + 1 < seq->nb_variable) && (seq->type[variable + 1] == AUXILIARY)) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable + 1][j] = real_sequence[i][variable + 1][j] + shift_param; + } + } + + seq->min_value[variable + 1] = min_value[variable + 1] + shift_param; + seq->max_value[variable + 1] = max_value[variable + 1] + shift_param; + } + + if ((seq->type[variable] == INT_VALUE) && (seq->min_value[variable] >= 0) && + (seq->max_value[variable] <= MARGINAL_DISTRIBUTION_MAX_VALUE)) { + if (marginal_distribution[variable]) { + seq->marginal_distribution[variable] = new FrequencyDistribution(*marginal_distribution[variable] , + SHIFT , shift_param); + } + else { + seq->build_marginal_frequency_distribution(variable); + } + } + + else { + seq->build_marginal_histogram(variable); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Shifting of values of a real-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] shift_param real shifting parameter. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::shift(StatError &error , int variable , double shift_param) const + +{ + bool status = true; + int i , j; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] != REAL_VALUE) { + status = false; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , STAT_variable_word[REAL_VALUE]); + } + } + + if (status) { + seq = new Sequences(*this , variable , type[variable]); + + // shifting of real values + + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable][j] = real_sequence[i][variable][j] + shift_param; + } + } + + seq->min_value[variable] = min_value[variable] + shift_param; + seq->max_value[variable] = max_value[variable] + shift_param; + + if ((variable + 1 < seq->nb_variable) && (seq->type[variable + 1] == AUXILIARY)) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable + 1][j] = real_sequence[i][variable + 1][j] + shift_param; + } + } + + seq->min_value[variable + 1] = min_value[variable + 1] + shift_param; + seq->max_value[variable + 1] = max_value[variable + 1] + shift_param; + } + + seq->build_marginal_histogram(variable); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Thresholding of values of a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] threshold integer threshold, + * \param[in] mode mode (ABOVE/BELOW). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::thresholding(StatError &error , int variable , int threshold , + threshold_direction mode) const + +{ + bool status = true; + int i , j; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != REAL_VALUE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + if ((type[variable] == INT_VALUE) && (variable + 1 < nb_variable) && + (type[variable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + + if (threshold <= min_value[variable]) { + status = false; + ostringstream correction_message; + correction_message << STAT_error[STATR_GREATER_THAN] << " " << min_value[variable]; + error.correction_update(STAT_error[STATR_THRESHOLD_VALUE] , (correction_message.str()).c_str()); + } + + if (threshold >= max_value[variable]) { + status = false; + ostringstream correction_message; + correction_message << STAT_error[STATR_SMALLER_THAN] << " " << max_value[variable]; + error.correction_update(STAT_error[STATR_THRESHOLD_VALUE] , (correction_message.str()).c_str()); + } + } + + if (status) { + seq = new Sequences(*this , variable , type[variable]); + + switch (seq->type[variable]) { + + // thresholding of integer values + + case INT_VALUE : { + switch (mode) { + + case ABOVE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + if (int_sequence[i][variable][j] > threshold) { + seq->int_sequence[i][variable][j] = threshold; + } + else { + seq->int_sequence[i][variable][j] = int_sequence[i][variable][j]; + } + } + } + break; + } + + case BELOW : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + if (int_sequence[i][variable][j] < threshold) { + seq->int_sequence[i][variable][j] = threshold; + } + else { + seq->int_sequence[i][variable][j] = int_sequence[i][variable][j]; + } + } + } + break; + } + } + + break; + } + + // thresholding of real values + + case REAL_VALUE : { + switch (mode) { + + case ABOVE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + if (real_sequence[i][variable][j] > threshold) { + seq->real_sequence[i][variable][j] = threshold; + } + else { + seq->real_sequence[i][variable][j] = real_sequence[i][variable][j]; + } + } + } + break; + } + + case BELOW : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + if (real_sequence[i][variable][j] < threshold) { + seq->real_sequence[i][variable][j] = threshold; + } + else { + seq->real_sequence[i][variable][j] = real_sequence[i][variable][j]; + } + } + } + break; + } + } + + break; + } + } + + switch (mode) { + case ABOVE : + seq->min_value[variable] = min_value[variable]; + seq->max_value[variable] = threshold; + break; + case BELOW : + seq->min_value[variable] = threshold; + seq->max_value[variable] = max_value[variable]; + break; + } + + if ((seq->type[variable] == INT_VALUE) && (seq->min_value[variable] >= 0) && + (seq->max_value[variable] <= MARGINAL_DISTRIBUTION_MAX_VALUE)) { + seq->build_marginal_frequency_distribution(variable); + } + else { + seq->build_marginal_histogram(variable); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Thresholding of values of a real-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] threshold real threshold, + * \param[in] mode mode (ABOVE/BELOW). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::thresholding(StatError &error , int variable , double threshold , + threshold_direction mode) const + +{ + bool status = true; + int i , j; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] != REAL_VALUE) { + status = false; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , STAT_variable_word[REAL_VALUE]); + } + + if (threshold <= min_value[variable]) { + status = false; + ostringstream correction_message; + correction_message << STAT_error[STATR_GREATER_THAN] << " " << min_value[variable]; + error.correction_update(STAT_error[STATR_THRESHOLD_VALUE] , (correction_message.str()).c_str()); + } + + if (threshold >= max_value[variable]) { + status = false; + ostringstream correction_message; + correction_message << STAT_error[STATR_SMALLER_THAN] << " " << max_value[variable]; + error.correction_update(STAT_error[STATR_THRESHOLD_VALUE] , (correction_message.str()).c_str()); + } + } + + if (status) { + seq = new Sequences(*this , variable , type[variable]); + + // thresholding of real values + + switch (mode) { + + case ABOVE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + if (real_sequence[i][variable][j] > threshold) { + seq->real_sequence[i][variable][j] = threshold; + } + else { + seq->real_sequence[i][variable][j] = real_sequence[i][variable][j]; + } + } + } + + seq->min_value[variable] = min_value[variable]; + seq->max_value[variable] = threshold; + break; + } + + case BELOW : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + if (real_sequence[i][variable][j] < threshold) { + seq->real_sequence[i][variable][j] = threshold; + } + else { + seq->real_sequence[i][variable][j] = real_sequence[i][variable][j]; + } + } + } + + seq->min_value[variable] = threshold; + seq->max_value[variable] = max_value[variable]; + break; + } + } + + seq->build_marginal_histogram(variable); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Clustering of values of a variable. + * + * \param[in] seq reference on a Sequences object, + * \param[in] variable variable index, + * \param[in] step clustering step, + * \param[in] mode mode (FLOOR/ROUND/CEIL). + */ +/*--------------------------------------------------------------*/ + +void Sequences::cluster(const Sequences &seq , int variable , int step , rounding mode) + +{ + int i , j; + + + switch (type[variable]) { + + // clustering of integer values + + case INT_VALUE : { + switch (mode) { + + case FLOOR : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + int_sequence[i][variable][j] = seq.int_sequence[i][variable][j] / step; + } + } + + min_value[variable] = (int)seq.min_value[variable] / step; + max_value[variable] = (int)seq.max_value[variable] / step; +// min_value[variable] = floor(seq.min_value[variable] / step); +// max_value[variable] = floor(seq.max_value[variable] / step); + break; + } + + case ROUND : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + int_sequence[i][variable][j] = (seq.int_sequence[i][variable][j] + step / 2) / step; +// int_sequence[i][variable][j] = (int)::round((double)seq.int_sequence[i][variable][j] / (double)step); + } + } + + min_value[variable] = ((int)seq.min_value[variable] + step / 2) / step; + max_value[variable] = ((int)seq.max_value[variable] + step / 2) / step; +// min_value[variable] = ::round(seq.min_value[variable] / step); +// max_value[variable] = ::round(seq.max_value[variable] / step); + break; + } + + case CEIL : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + int_sequence[i][variable][j] = (seq.int_sequence[i][variable][j] + step - 1) / step; +// int_sequence[i][variable][j] = (int)ceil((double)seq.int_sequence[i][variable][j] / (double)step); + } + } + + min_value[variable] = ((int)seq.min_value[variable] + step - 1) / step; + max_value[variable] = ((int)seq.max_value[variable] + step - 1) / step; +// min_value[variable] = ceil(seq.min_value[variable] / step); +// max_value[variable] = ceil(seq.max_value[variable] / step); + break; + } + } + + if (seq.marginal_distribution[variable]) { + marginal_distribution[variable] = new FrequencyDistribution(*(seq.marginal_distribution[variable]) , + CLUSTER , step , mode); + } + else { + build_marginal_frequency_distribution(variable); + } + break; + } + + // clustering of real values + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + real_sequence[i][variable][j] = seq.real_sequence[i][variable][j] / step; + } + } + + min_value[variable] = seq.min_value[variable] / step; + max_value[variable] = seq.max_value[variable] / step; + + build_marginal_histogram(variable , seq.marginal_histogram[variable]->bin_width / step); + + if ((variable + 1 < nb_variable) && (type[variable + 1] == AUXILIARY)) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + real_sequence[i][variable + 1][j] = seq.real_sequence[i][variable + 1][j] / step; + } + } + + min_value[variable + 1] = seq.min_value[variable + 1] / step; + max_value[variable + 1] = seq.max_value[variable + 1] / step; + } + break; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Clustering of values of a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] step clustering step, + * \param[in] mode mode (FLOOR/ROUND/CEIL). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::cluster(StatError &error , int variable , int step , rounding mode) const + +{ + bool status = true; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != REAL_VALUE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + if ((type[variable] == INT_VALUE) && (variable + 1 < nb_variable) && + (type[variable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + } + + if (step < 1) { + status = false; + error.update(STAT_error[STATR_CLUSTERING_STEP]); + } + + if (status) { + seq = new Sequences(*this , variable , type[variable]); + seq->cluster(*this , variable , step , mode); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Transcoding of categories of an integer-valued variable. + * + * \param[in] seq reference on a Sequences object, + * \param[in] ivariable variable index, + * \param[in] min_category lowest category, + * \param[in] max_category highest category, + * \param[in] category transcoding table, + * \param[in] add_variable flag for adding a variable. + */ +/*--------------------------------------------------------------*/ + +void Sequences::transcode(const Sequences &seq , int ivariable , int min_category , + int max_category , int *category , bool add_variable) + +{ + int i , j , k; + int variable , offset; + + + // copy of index parameters + + if (seq.index_parameter_distribution) { + index_parameter_distribution = new FrequencyDistribution(*(seq.index_parameter_distribution)); + } + if (seq.index_interval) { + index_interval = new FrequencyDistribution(*(seq.index_interval)); + } + + if (seq.index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + index_parameter[i][j] = seq.index_parameter[i][j]; + } + } + } + + if (add_variable) { + variable = 0; + offset = 1; + } + else { + variable = ivariable; + offset = 0; + } + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + + // transcoding of categories + + if (j == variable) { + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = category[seq.int_sequence[i][ivariable][k] - + (int)seq.min_value[variable]] + min_category; + } + } + + // copy of integer values + + else { + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = seq.int_sequence[i][j - offset][k]; + } + } + } + + // copy of real values + + else { + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = seq.real_sequence[i][j - offset][k]; + } + } + } + } + + for (i = 0;i < nb_variable;i++) { + if (i == variable) { + min_value[i] = min_category; + max_value[i] = max_category; + + build_marginal_frequency_distribution(i); + } + + else { + min_value[i] = seq.min_value[i - offset]; + max_value[i] = seq.max_value[i - offset]; + + if (seq.marginal_distribution[i - offset]) { + marginal_distribution[i] = new FrequencyDistribution(*(seq.marginal_distribution[i - offset])); + } + if (seq.marginal_histogram[i - offset]) { + marginal_histogram[i] = new Histogram(*(seq.marginal_histogram[i - offset])); + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Transcoding of categories of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] category transcoding table. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::transcode(StatError &error , int variable , int *category) const + +{ + bool status = true , *presence; + int i; + int min_category , max_category; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + if ((variable + 1 < nb_variable) && (type[variable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + + if (status) { + min_category = category[0]; + max_category = category[0]; + + for (i = 1;i <= (int)(max_value[variable] - min_value[variable]);i++) { + if (category[i] < min_category) { + min_category = category[i]; + } + if (category[i] > max_category) { + max_category = category[i]; + } + } + + if (max_category - min_category == 0) { + status = false; + error.update(STAT_error[STATR_NB_CATEGORY]); + } + + if (max_category - min_category > (int)(max_value[variable] - min_value[variable])) { + status = false; + error.update(STAT_error[STATR_NON_CONSECUTIVE_CATEGORIES]); + } + } + + if (status) { + presence = new bool[max_category - min_category + 1]; + for (i = 0;i <= max_category - min_category;i++) { + presence[i] = false; + } + + for (i = 0;i <= (int)(max_value[variable] - min_value[variable]);i++) { + presence[category[i] - min_category] = true; + } + + for (i = 0;i <= max_category - min_category;i++) { + if (!presence[i]) { + status = false; + ostringstream error_message; + error_message << STAT_error[STATR_MISSING_CATEGORY] << " " << i + min_category; + error.update((error_message.str()).c_str()); + } + } + + delete [] presence; + } + + if (status) { + for (i = 0;i <= (int)(max_value[variable] - min_value[variable]);i++) { + category[i] -= min_category; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable , type); + seq->transcode(*this , variable , min_category , max_category , category); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Transcoding of categories of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] category transcoding table. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::transcode(StatError &error , int variable , vector &category) const + +{ + return transcode(error , variable , category.data()); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] nb_class number of classes, + * \param[in] ilimit integer limits between classes (beginning of classes). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::cluster(StatError &error , int variable , + int nb_class , int *ilimit) const + +{ + bool status = true; + int i , j , k; + int *int_limit , *category; + variable_nature *itype; + double *real_limit; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE) && + (type[variable] != REAL_VALUE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else if ((nb_class < 2) || (nb_class >= (int)(max_value[variable] - min_value[variable]) + 1)) { + status = false; + error.update(STAT_error[STATR_NB_CLASS]); + } + + if ((variable + 1 < nb_variable) && (type[variable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + } + + if (status) { + if ((type[variable] == INT_VALUE) || (type[variable] == STATE)) { + int_limit = new int[nb_class + 1]; + int_limit[0] = (int)min_value[variable]; + for (i = 1;i < nb_class;i++) { + int_limit[i] = ilimit[i - 1]; + } + int_limit[nb_class] = (int)max_value[variable] + 1; + + for (i = 0;i < nb_class;i++) { + if (int_limit[i] >= int_limit[i + 1]) { + status = false; + error.update(STAT_error[STATR_CLUSTER_LIMIT]); + } + } + + if (status) { + category = new int[(int)(max_value[variable] - min_value[variable]) + 1]; + + i = 0; + for (j = 0;j < nb_class;j++) { + for (k = int_limit[j];k < int_limit[j + 1];k++) { + category[i++] = j; + } + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable , type); + seq->transcode(*this , variable , 0 , nb_class - 1 , category); + + delete [] category; + } + + delete [] int_limit; + } + + else { + real_limit = new double[nb_class + 1]; + real_limit[0] = min_value[variable]; + for (i = 1;i < nb_class;i++) { + real_limit[i] = ilimit[i - 1]; + } + real_limit[nb_class] = max_value[variable] + 1; + + for (i = 0;i < nb_class;i++) { + if (real_limit[i] >= real_limit[i + 1]) { + status = false; + error.update(STAT_error[STATR_CLUSTER_LIMIT]); + } + } + + if (status) { + seq = new Sequences(*this , variable , INT_VALUE); + seq->cluster(*this , variable , nb_class , real_limit); + } + + delete [] real_limit; + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] nb_class number of classes, + * \param[in] ilimit integer limits between classes (beginning of classes). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::cluster(StatError &error , int variable , + int nb_class , vector &ilimit) const + +{ + return cluster(error , variable , nb_class , ilimit.data()); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of a real-valued variable. + * + * \param[in] seq reference on a Sequences object, + * \param[in] variable variable index, + * \param[in] nb_class number of classes, + * \param[in] limit real limits between classes (beginning of classes). + */ +/*--------------------------------------------------------------*/ + +void Sequences::cluster(const Sequences &seq , int variable , int nb_class , double *limit) + +{ + int i , j , k; + + + // grouping of real values + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = 0;k < nb_class;k++) { + if (seq.real_sequence[i][variable][j] < limit[k + 1]) { + int_sequence[i][variable][j] = k; + break; + } + } + } + } + + min_value_computation(variable); + max_value_computation(variable); + + build_marginal_frequency_distribution(variable); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of a real-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] nb_class number of classes, + * \param[in] ilimit real limits between classes (beginning of classes). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::cluster(StatError &error , int variable , + int nb_class , double *ilimit) const + +{ + bool status = true; + int i; + double *limit; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] != REAL_VALUE) { + status = false; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , STAT_variable_word[REAL_VALUE]); + } + + if ((variable + 1 < nb_variable) && (type[variable + 1] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + } + + if (nb_class < 2) { + status = false; + error.update(STAT_error[STATR_NB_CLASS]); + } + + if (status) { + limit = new double[nb_class + 1]; + limit[0] = min_value[variable]; + for (i = 1;i < nb_class;i++) { + limit[i] = ilimit[i - 1]; + } + limit[nb_class] = max_value[variable] + DOUBLE_ERROR; + + for (i = 0;i < nb_class;i++) { + if (limit[i] >= limit[i + 1]) { + status = false; + error.update(STAT_error[STATR_CLUSTER_LIMIT]); + } + } + + if (status) { + seq = new Sequences(*this , variable , INT_VALUE); + seq->cluster(*this , variable , nb_class , limit); + } + + delete [] limit; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Partitioning of values of a real-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] nb_class number of classes, + * \param[in] ilimit real limits between classes (beginning of classes). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::cluster(StatError &error , int variable , + int nb_class , vector &ilimit) const + +{ + return cluster(error , variable , nb_class , ilimit.data()); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Scaling of a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] scaling_coeff integer scaling factor. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::scaling(StatError &error , int variable , int scaling_coeff) const + +{ + bool status = true; + int i , j; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] == INT_VALUE) { + if ((min_value[variable] * scaling_coeff < INT_MIN) || + (max_value[variable] * scaling_coeff > INT_MAX)) { + status = false; + error.update(STAT_error[STATR_SCALING_COEFF]); + } + } + + else if (type[variable] != REAL_VALUE) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + } + + if (scaling_coeff <= 1) { + status = false; + error.update(STAT_error[STATR_SCALING_COEFF]); + } + + if (status) { + seq = new Sequences(*this , variable , type[variable]); + + switch (seq->type[variable]) { + + // scaling of integer values + + case INT_VALUE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->int_sequence[i][variable][j] = int_sequence[i][variable][j] * scaling_coeff; + } + } + break; + } + + // scaling of real values + + case REAL_VALUE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable][j] = real_sequence[i][variable][j] * scaling_coeff; + } + } + break; + } + } + + seq->min_value[variable] = min_value[variable] * scaling_coeff; + seq->max_value[variable] = max_value[variable] * scaling_coeff; + + seq->build_marginal_frequency_distribution(variable); + + if ((variable + 1 < seq->nb_variable) && (seq->type[variable + 1] == AUXILIARY)) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable + 1][j] = real_sequence[i][variable + 1][j] * scaling_coeff; + } + } + + seq->min_value[variable + 1] = min_value[variable + 1] * scaling_coeff; + seq->max_value[variable + 1] = max_value[variable + 1] * scaling_coeff; + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Scaling of a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] scaling_coeff real scaling factor. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::scaling(StatError &error , int variable , double scaling_coeff) const + +{ + bool status = true; + int i , j; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != REAL_VALUE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + } + + if (scaling_coeff <= 0) { + status = false; + error.update(STAT_error[STATR_SCALING_COEFF]); + } + + if (status) { + seq = new Sequences(*this , variable , REAL_VALUE); + + switch (type[variable]) { + + // scaling of integer values + + case INT_VALUE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable][j] = int_sequence[i][variable][j] * scaling_coeff; + } + } + break; + } + + // scaling of real values + + case REAL_VALUE : { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable][j] = real_sequence[i][variable][j] * scaling_coeff; + } + } + break; + } + } + + seq->min_value[variable] = min_value[variable] * scaling_coeff; + seq->max_value[variable] = max_value[variable] * scaling_coeff; + + seq->build_marginal_histogram(variable); + + if ((variable + 1 < seq->nb_variable) && (seq->type[variable + 1] == AUXILIARY)) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->real_sequence[i][variable + 1][j] = real_sequence[i][variable + 1][j] * scaling_coeff; + } + } + + seq->min_value[variable + 1] = min_value[variable + 1] * scaling_coeff; + seq->max_value[variable + 1] = max_value[variable + 1] * scaling_coeff; + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Rounding of values of a real-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] mode mode (FLOOR/ROUND/CEIL). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::round(StatError &error , int variable , rounding mode) const + +{ + bool status = true; + int i , j , k; + variable_nature *itype; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (variable != I_DEFAULT) { + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] != REAL_VALUE) { + status = false; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , STAT_variable_word[REAL_VALUE]); + } + } + } + + if (status) { + for (i = 0;i < nb_variable;i++) { + if (((variable == I_DEFAULT) && (type[i] == REAL_VALUE)) || (variable == i)) { + if (min_value[i] < INT_MIN) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_ROUNDED_VALUE]; + correction_message << STAT_error[STATR_GREATER_THAN] << " " << INT_MIN; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + if (max_value[i] > INT_MAX) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_ROUNDED_VALUE]; + correction_message << STAT_error[STATR_SMALLER_THAN] << " " << INT_MAX; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + } + } + + if (status) { + itype = new variable_nature[nb_variable]; + + if (variable == I_DEFAULT) { + for (i = 0;i < nb_variable;i++) { + if (type[i] == REAL_VALUE) { + itype[i] = INT_VALUE; + } + else { + itype[i] = type[i]; + } + } + } + + else { + for (i = 0;i < nb_variable;i++) { + itype[i] = type[i]; + } + itype[variable] = INT_VALUE; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < (seq->index_param_type == POSITION ? seq->length[i] + 1 : seq->length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->nb_variable;j++) { + + // copy of integer values + + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + for (k = 0;k < seq->length[i];k++) { + seq->int_sequence[i][j][k] = int_sequence[i][j][k]; + } + } + + else { + + // rounding of real values + + if (((variable == I_DEFAULT) && (type[j] == REAL_VALUE)) || (variable == j)) { + switch (mode) { + + case FLOOR : { + for (k = 0;k < seq->length[i];k++) { + seq->int_sequence[i][j][k] = (int)floor(real_sequence[i][j][k]); + } + break; + } + + case ROUND : { + for (k = 0;k < seq->length[i];k++) { + seq->int_sequence[i][j][k] = (int)::round(real_sequence[i][j][k]); + } + break; + } + + case CEIL : { + for (k = 0;k < seq->length[i];k++) { + seq->int_sequence[i][j][k] = (int)ceil(real_sequence[i][j][k]); + } + break; + } + } + } + + // copy of real values + + else { + for (k = 0;k < seq->length[i];k++) { + seq->real_sequence[i][j][k] = real_sequence[i][j][k]; + } + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + if (((variable == I_DEFAULT) && (type[i] == REAL_VALUE)) || (variable == i)) { + switch (mode) { + case FLOOR : + seq->min_value[i] = floor(min_value[i]); + seq->max_value[i] = floor(max_value[i]); + break; + case ROUND : + seq->min_value[i] = ::round(min_value[i]); + seq->max_value[i] = ::round(max_value[i]); + break; + case CEIL : + seq->min_value[i] = ceil(min_value[i]); + seq->max_value[i] = ceil(max_value[i]); + break; + } + + seq->build_marginal_frequency_distribution(i); + } + + else { + seq->min_value[i] = min_value[i]; + seq->max_value[i] = max_value[i]; + + if (marginal_distribution[i]) { + seq->marginal_distribution[i] = new FrequencyDistribution(*marginal_distribution[i]); + } + if (marginal_histogram[i]) { + seq->marginal_histogram[i] = new Histogram(*marginal_histogram[i]); + } + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of sequences taking values in a given range for the index parameter. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the selected individuals, + * \param[in] min_index_parameter lowest index parameter, + * \param[in] max_index_parameter highest index parameter, + * \param[in] keep flag for keeping or rejecting the selected sequences. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::index_parameter_select(StatError &error , ostream *os , + int min_index_parameter , + int max_index_parameter , bool keep) const + +{ + bool status = true; + int i , j; + int inb_sequence , *index , *iidentifier; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((index_param_type != TIME) && (index_param_type != POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + else { + if ((min_index_parameter < 0) || (min_index_parameter >= index_parameter_distribution->nb_value) || + (min_index_parameter > max_index_parameter)) { + status = false; + error.update(SEQ_error[SEQR_MIN_INDEX_PARAMETER]); + } + if ((max_index_parameter < index_parameter_distribution->offset) || + (max_index_parameter < min_index_parameter)) { + status = false; + error.update(SEQ_error[SEQR_MAX_INDEX_PARAMETER]); + } + } + + if (status) { + + // selection of sequences + + iidentifier = new int[nb_sequence]; + index = new int[nb_sequence]; + inb_sequence = 0; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + if ((index_parameter[i][j] >= min_index_parameter) && + (index_parameter[i][j] <= max_index_parameter)) { + if (keep) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + break; + } + } + + if ((!keep) && (j == (index_param_type == POSITION ? length[i] + 1 : length[i]))) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + } + + if (inb_sequence == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + // copy of sequences + + if (status) { + if ((os) && (inb_sequence <= DISPLAY_NB_INDIVIDUAL)) { + *os << "\n" << SEQ_label[inb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << ": "; + for (i = 0;i < inb_sequence;i++) { + *os << iidentifier[i] << ", "; + } + *os << endl; + } + + seq = new Sequences(*this , inb_sequence , index); + } + + delete [] iidentifier; + delete [] index; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of sequences taking values in a given range for a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the selected individuals, + * \param[in] variable variable index, + * \param[in] imin_value lowest integer value, + * \param[in] imax_value highest integer value, + * \param[in] keep flag for keeping or rejecting the selected sequences. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::value_select(StatError &error , ostream *os , int variable , + int imin_value , int imax_value , bool keep) const + +{ + bool status = true; + int i , j; + int inb_sequence , *index , *iidentifier; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE) && + (type[variable] != REAL_VALUE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if ((imin_value > max_value[variable]) || (imin_value > imax_value)) { + status = false; + error.update(STAT_error[STATR_MIN_VALUE]); + } + if ((imax_value < min_value[variable]) || (imax_value < imin_value)) { + status = false; + error.update(STAT_error[STATR_MAX_VALUE]); + } + } + } + + if (status) { + + // selection of sequences + + iidentifier = new int[nb_sequence]; + index = new int[nb_sequence]; + inb_sequence = 0; + + if (type[variable] != REAL_VALUE) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if ((int_sequence[i][variable][j] >= imin_value) && + (int_sequence[i][variable][j] <= imax_value)) { + if (keep) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + break; + } + } + + if ((!keep) && (j == length[i])) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if ((real_sequence[i][variable][j] >= imin_value) && + (real_sequence[i][variable][j] <= imax_value)) { + if (keep) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + break; + } + } + + if ((!keep) && (j == length[i])) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + } + } + + if (inb_sequence == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + // copy of sequences + + if (status) { + if ((os) && (inb_sequence <= DISPLAY_NB_INDIVIDUAL)) { + *os << "\n" << SEQ_label[inb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << ": "; + for (i = 0;i < inb_sequence;i++) { + *os << iidentifier[i] << ", "; + } + *os << endl; + } + + seq = new Sequences(*this , inb_sequence , index); + } + + delete [] iidentifier; + delete [] index; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of sequences taking values in a given range for a real-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the selected individuals, + * \param[in] variable variable index, + * \param[in] imin_value lowest real value, + * \param[in] imax_value highest real value, + * \param[in] keep flag for keeping or rejecting the selected sequences. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::value_select(StatError &error , ostream *os , int variable , + double imin_value , double imax_value , bool keep) const + +{ + bool status = true; + int i , j; + int inb_sequence , *index , *iidentifier; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] != REAL_VALUE) { + status = false; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , STAT_variable_word[REAL_VALUE]); + } + + else { + if ((imin_value > max_value[variable]) || (imin_value > imax_value)) { + status = false; + error.update(STAT_error[STATR_MIN_VALUE]); + } + if ((imax_value < min_value[variable]) || (imax_value < imin_value)) { + status = false; + error.update(STAT_error[STATR_MAX_VALUE]); + } + } + } + + if (status) { + + // selection of sequences + + iidentifier = new int[nb_sequence]; + index = new int[nb_sequence]; + inb_sequence = 0; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if ((real_sequence[i][variable][j] >= imin_value) && + (real_sequence[i][variable][j] <= imax_value)) { + if (keep) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + break; + } + } + + if ((!keep) && (j == length[i])) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + } + + if (inb_sequence == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + // copy of sequences + + if (status) { + if ((os) && (inb_sequence <= DISPLAY_NB_INDIVIDUAL)) { + *os << "\n" << SEQ_label[inb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << ": "; + for (i = 0;i < inb_sequence;i++) { + *os << iidentifier[i] << ", "; + } + *os << endl; + } + + seq = new Sequences(*this , inb_sequence , index); + } + + delete [] iidentifier; + delete [] index; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of sequences by their identifiers. + * + * \param[in] error reference on a StatError object, + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] keep flag for keeping or rejecting the selected individuals. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::select_individual(StatError &error , int inb_sequence , + int *iidentifier , bool keep) const + +{ + bool status = true; + int *index; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((inb_sequence < 1) || (inb_sequence > (keep ? nb_sequence : nb_sequence - 1))) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + status = selected_identifier_checking(error , nb_sequence , identifier , inb_sequence , + iidentifier , SEQ_label[SEQL_SEQUENCE]); + } + + if (status) { + index = identifier_select(nb_sequence , identifier , inb_sequence , iidentifier , keep); + + seq = new Sequences(*this , (keep ? inb_sequence : nb_sequence - inb_sequence) , index); + + delete [] index; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of sequences by their identifiers. + * + * \param[in] error reference on a StatError object, + * \param[in] inb_sequence number of sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] keep flag for keeping or rejecting the selected sequences. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::select_individual(StatError &error , int inb_sequence , + vector &iidentifier , bool keep) const + +{ + return select_individual(error , inb_sequence , iidentifier.data() , keep); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a Sequences object transforming the implicit index parameters in + * explicit index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::explicit_index_parameter(StatError &error) const + +{ + Sequences *seq; + + + error.init(); + + if (index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new Sequences(*this , EXPLICIT_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Removing of the index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::remove_index_parameter(StatError &error) const + +{ + Sequences *seq; + + + error.init(); + + if (!index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new Sequences(*this , REMOVE_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of variables. + * + * \param[in] seq reference on a Sequences object, + * \param[in] variable variable indices. + */ +/*--------------------------------------------------------------*/ + +void Sequences::select_variable(const Sequences &seq , int *variable) + +{ + int i , j , k; + + + // copy of index parameters + + if (seq.index_parameter_distribution) { + index_parameter_distribution = new FrequencyDistribution(*(seq.index_parameter_distribution)); + } + if (seq.index_interval) { + index_interval = new FrequencyDistribution(*(seq.index_interval)); + } + + if (seq.index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + index_parameter[i][j] = seq.index_parameter[i][j]; + } + } + } + + // copy of values + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if ((type[j] != REAL_VALUE) && (type[j] != AUXILIARY)) { + for (k = 0;k < length[i];k++) { + int_sequence[i][j][k] = seq.int_sequence[i][variable[j]][k]; + } + } + + else { + for (k = 0;k < length[i];k++) { + real_sequence[i][j][k] = seq.real_sequence[i][variable[j]][k]; + } + } + } + } + + for (i = 0;i < nb_variable;i++) { + min_value[i] = seq.min_value[variable[i]]; + max_value[i] = seq.max_value[variable[i]]; + + if (seq.marginal_distribution[variable[i]]) { + marginal_distribution[i] = new FrequencyDistribution(*(seq.marginal_distribution[variable[i]])); + } + if (seq.marginal_histogram[variable[i]]) { + marginal_histogram[i] = new Histogram(*(seq.marginal_histogram[variable[i]])); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of variables. + * + * \param[in] error reference on a StatError object, + * \param[in] inb_variable number of variables, + * \param[in] ivariable variable indices, + * \param[in] keep flag for keeping or rejecting the selected variables. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::select_variable(StatError &error , int inb_variable , + int *ivariable , bool keep) const + +{ + bool status = true , *selected_variable; + int i; + int bnb_variable , *variable; + variable_nature *itype; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((inb_variable < 1) || (inb_variable > (keep ? nb_variable : nb_variable - 1))) { + status = false; + error.update(STAT_error[STATR_NB_SELECTED_VARIABLE]); + } + + else { + selected_variable = new bool[nb_variable + 1]; + for (i = 1;i <= nb_variable;i++) { + selected_variable[i] = false; + } + + for (i = 0;i < inb_variable;i++) { + if ((ivariable[i] < 1) || (ivariable[i] > nb_variable)) { + status = false; + ostringstream error_message; + error_message << ivariable[i] << ": " << STAT_error[STATR_VARIABLE_INDEX]; + error.update((error_message.str()).c_str()); + } + + else if (selected_variable[ivariable[i]]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << ivariable[i] << " " + << STAT_error[STATR_ALREADY_SELECTED]; + error.update((error_message.str()).c_str()); + } + else { + selected_variable[ivariable[i]] = true; + } + } + + delete [] selected_variable; + } + + if (status) { + variable = ::select_variable(nb_variable , inb_variable , ivariable , keep); + + bnb_variable = (keep ? inb_variable : nb_variable - inb_variable); + + for (i = 0;i < bnb_variable;i++) { + if ((type[variable[i]] == AUXILIARY) && + ((i == 0) || (variable[i - 1] != variable[i] - 1))) { + status = false; + error.update(SEQ_error[SEQR_VARIABLE_INDICES]); + } + } + + if (status) { + itype = new variable_nature[bnb_variable]; + for (i = 0;i < bnb_variable;i++) { + itype[i] = type[variable[i]]; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , bnb_variable , itype); + seq->select_variable(*this , variable); + + delete [] itype; + } + + delete [] variable; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of variables. + * + * \param[in] error reference on a StatError object, + * \param[in] inb_variable number of variables, + * \param[in] ivariable variable indices, + * \param[in] keep flag for keeping or rejecting the selected variables. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::select_variable(StatError &error , int inb_variable , + vector &ivariable , bool keep) const + +{ + return select_variable(error , inb_variable , ivariable.data() , keep); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Summation of variables. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_summed_variable number of variables to be summed, + * \param[in] variable variable indices. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::sum_variable(StatError &error , int nb_summed_variable , int *ivariable) const + +{ + bool status = true , *selected_variable; + int i , j , k , m , n; + int inb_variable , *copied_variable , *summed_variable; + variable_nature *itype; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + + if ((nb_summed_variable < 2) || (nb_summed_variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_NB_SUMMED_VARIABLE]); + } + + else { + selected_variable = new bool[nb_variable + 1]; + for (i = 1;i <= nb_variable;i++) { + selected_variable[i] = false; + } + + for (i = 0;i < nb_summed_variable;i++) { + if ((ivariable[i] < 1) || (ivariable[i] > nb_variable)) { + status = false; + ostringstream error_message; + error_message << ivariable[i] << ": " << STAT_error[STATR_VARIABLE_INDEX]; + error.update((error_message.str()).c_str()); + } + + else { + if (selected_variable[ivariable[i]]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << ivariable[i] << " " + << STAT_error[STATR_ALREADY_SELECTED]; + error.update((error_message.str()).c_str()); + } + + else { + selected_variable[ivariable[i]] = true; + + if ((type[ivariable[i] - 1] != INT_VALUE) && (type[ivariable[i] - 1] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << ivariable[i] << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if ((ivariable[i] < nb_variable) && (type[ivariable[i]] == AUXILIARY)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << ivariable[i] + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.update((error_message.str()).c_str()); + } + } + } + } + + delete [] selected_variable; + } + + if (status) { + inb_variable = nb_variable - nb_summed_variable + 1; + for (i = 0;i < nb_summed_variable;i++) { + ivariable[i]--; + } + + summed_variable = new int[nb_summed_variable]; + copied_variable = new int[nb_variable - nb_summed_variable]; + i = 0; + j = 0; + for (k = 0;k < nb_variable;k++) { + for (m = 0;m < nb_summed_variable;m++) { + if (ivariable[m] == k) { + summed_variable[i++] = k; + break; + } + } + if (m == nb_summed_variable) { + copied_variable[j++] = k; + } + } + + itype = new variable_nature[inb_variable]; + i = 0; + for (j = 0;j < inb_variable;j++) { + if (j == summed_variable[0]) { + itype[j] = type[j]; + for (k = 1;k < nb_summed_variable;k++) { + if (type[summed_variable[k]] == REAL_VALUE) { + itype[j] = REAL_VALUE; + } + } + } + else { + itype[j] = type[copied_variable[i++]]; + } + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , inb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*(index_parameter_distribution)); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*(index_interval)); + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + k = 0; + for (m = 0;m < inb_variable;m++) { + + // summation of values + + if (m == summed_variable[0]) { + switch (seq->type[m]) { + + case INT_VALUE : { + seq->int_sequence[i][m][j] = 0.; + + for (n = 0;n < nb_summed_variable;n++) { + switch (type[summed_variable[n]]) { + case INT_VALUE : + seq->int_sequence[i][m][j] += int_sequence[i][summed_variable[n]][j]; + break; + case REAL_VALUE : + seq->int_sequence[i][m][j] += real_sequence[i][summed_variable[n]][j]; + break; + } + } + break; + } + + case REAL_VALUE : { + seq->real_sequence[i][m][j] = 0.; + + for (n = 0;n < nb_summed_variable;n++) { + switch (type[summed_variable[n]]) { + case INT_VALUE : + seq->real_sequence[i][m][j] += int_sequence[i][summed_variable[n]][j]; + break; + case REAL_VALUE : + seq->real_sequence[i][m][j] += real_sequence[i][summed_variable[n]][j]; + break; + } + } + break; + } + } + } + + // copy of values + + else { + switch (seq->type[m]) { + case INT_VALUE : + seq->int_sequence[i][m][j] = int_sequence[i][copied_variable[k++]][j]; + break; + case REAL_VALUE : + seq->real_sequence[i][m][j] = real_sequence[i][copied_variable[k++]][j]; + break; + } + } + } + } + } + + i = 0; + for (j = 0;j < inb_variable;j++) { + if (j == summed_variable[0]) { + seq->min_value_computation(j); + seq->max_value_computation(j); + + seq->build_marginal_frequency_distribution(j); + } + + else { + seq->min_value[j] = min_value[copied_variable[i]]; + seq->max_value[j] = max_value[copied_variable[i]]; + + if (marginal_distribution[copied_variable[i]]) { + seq->marginal_distribution[j] = new FrequencyDistribution(*(marginal_distribution[copied_variable[i]])); + } + if (marginal_histogram[copied_variable[i]]) { + seq->marginal_histogram[j] = new Histogram(*(marginal_histogram[copied_variable[i]])); + } + i++; + } + } + + delete [] summed_variable; + delete [] copied_variable; + } + + return seq; +} + + + +/*--------------------------------------------------------------*/ +/** + * \brief Summation of variables. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_summed_variable number of variables to be summed, + * \param[in] variable variable indices. + * + * \return Vectors object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::sum_variable(StatError &error , int nb_summed_variable , vector &ivariable) const + +{ + return sum_variable(error , nb_summed_variable , ivariable.data()); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of variables of Sequences objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of Sequences objects, + * \param[in] iseq pointer on the Sequences objects, + * \param[in] ref_sample reference Sequences object for the identifiers. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::merge_variable(StatError &error , int nb_sample , + const Sequences **iseq , int ref_sample) const + +{ + bool status = true; + int i , j , k , m; + int inb_variable , *iidentifier , **ivertex_identifier; + variable_nature *itype; + Sequences *seq; + const Sequences **pseq; + + + seq = NULL; + error.init(); + + for (i = 0;i < nb_sample;i++) { + if ((iseq[i]->vertex_identifier) && (!vertex_identifier)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_error[SEQR_SAMPLE_VERTEX_IDENTIFIER]; + error.update((error_message.str()).c_str()); + } + + if ((iseq[i]->index_param_type != IMPLICIT_TYPE) && + (iseq[i]->index_param_type != index_param_type)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_error[SEQR_INDEX_PARAMETER_TYPE]; + + if (index_param_type == IMPLICIT_TYPE) { + error.update((error_message.str()).c_str()); + } + else { + error.correction_update((error_message.str()).c_str() , SEQ_index_parameter_word[index_param_type]); + } + } + + if (iseq[i]->nb_sequence != nb_sequence) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_error[SEQR_NB_SEQUENCE]; + error.update((error_message.str()).c_str()); + } + + else { + for (j = 0;j < nb_sequence;j++) { + if (iseq[i]->length[j] != length[j]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_label[SEQL_SEQUENCE] << " " << j + 1 << ": " + << SEQ_error[SEQR_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + + else { + if ((iseq[i]->vertex_identifier) && (vertex_identifier)) { + for (k = 0;k < length[j];k++) { + if (iseq[i]->vertex_identifier[j][k] != vertex_identifier[j][k]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_label[SEQL_SEQUENCE] << " " << j + 1 << ": " + << SEQ_label[SEQL_VERTEX_IDENTIFIER] << " " << k << ": " + << SEQ_error[SEQR_VERTEX_IDENTIFIER]; + error.update((error_message.str()).c_str()); + } + } + } + + if ((iseq[i]->index_param_type != IMPLICIT_TYPE) && + (iseq[i]->index_param_type == index_param_type)) { + for (k = 0;k < (index_param_type == POSITION ? length[j] + 1 : length[j]);k++) { + if (iseq[i]->index_parameter[j][k] != index_parameter[j][k]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_label[SEQL_SEQUENCE] << " " << j + 1 << ": " + << SEQ_label[SEQL_INDEX] << " " << k << ": " + << SEQ_error[SEQR_INDEX_PARAMETER]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + } + + if ((ref_sample != I_DEFAULT) && ((ref_sample < 1) || (ref_sample > nb_sample + 1))) { + status = false; + error.update(STAT_error[STATR_SAMPLE_INDEX]); + } + + if (status) { + nb_sample++; + pseq = new const Sequences*[nb_sample]; + + pseq[0] = this; + inb_variable = nb_variable; + for (i = 1;i < nb_sample;i++) { + pseq[i] = iseq[i - 1]; + inb_variable += iseq[i - 1]->nb_variable; + } + + // comparison of sequence identifiers + + if (ref_sample == I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_sample;j++) { + if (pseq[j]->identifier[i] != pseq[0]->identifier[i]) { + break; + } + } + if (j < nb_sample) { + break; + } + } + + if (i < nb_sequence) { + iidentifier = NULL; + } + else { + iidentifier = pseq[0]->identifier; + } + } + + else { + ref_sample--; + iidentifier = pseq[ref_sample]->identifier; + } + + // comparison of vertex identifiers + + if (ref_sample == I_DEFAULT) { + for (i = 0;i < nb_sample;i++) { + if (!(pseq[i]->vertex_identifier)) { + break; + } + } + + if (i == nb_sample) { + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->length[i];k++) { + if (pseq[j]->vertex_identifier[i][k] != pseq[0]->vertex_identifier[i][k]) { + break; + } + } + + if (k < pseq[j]->length[i]) { + break; + } + } + + if (j < nb_sample) { + break; + } + } + + if (i < nb_sequence) { + ivertex_identifier = NULL; + } + else { + ivertex_identifier = pseq[0]->vertex_identifier; + } + } + + else { + ivertex_identifier = NULL; + } + } + + else { + ivertex_identifier = pseq[ref_sample]->vertex_identifier; + } + + itype = new variable_nature[inb_variable]; + inb_variable = 0; + for (i = 0;i < nb_sample;i++) { + for (j = 0;j < pseq[i]->nb_variable;j++) { + itype[inb_variable] = pseq[i]->type[j]; + if ((inb_variable > 0) && (itype[inb_variable] == STATE)) { + itype[inb_variable] = INT_VALUE; + } + inb_variable++; + } + } + + seq = new Sequences(nb_sequence , iidentifier , length , ivertex_identifier , + index_param_type , inb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + // copy of values + + for (i = 0;i < nb_sequence;i++) { + inb_variable = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < pseq[j]->nb_variable;k++) { + if ((seq->type[inb_variable] != REAL_VALUE) && (seq->type[inb_variable] != AUXILIARY)) { + for (m = 0;m < length[i];m++) { + seq->int_sequence[i][inb_variable][m] = pseq[j]->int_sequence[i][k][m]; + } + } + + else { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][inb_variable][m] = pseq[j]->real_sequence[i][k][m]; + } + } + + inb_variable++; + } + } + } + + inb_variable = 0; + for (i = 0;i < nb_sample;i++) { + for (j = 0;j < pseq[i]->nb_variable;j++) { + seq->min_value[inb_variable] = pseq[i]->min_value[j]; + seq->max_value[inb_variable] = pseq[i]->max_value[j]; + + if (pseq[i]->marginal_distribution[j]) { + seq->marginal_distribution[inb_variable] = new FrequencyDistribution(*(pseq[i]->marginal_distribution[j])); + } + if (pseq[i]->marginal_histogram[j]) { + seq->marginal_histogram[inb_variable] = new Histogram(*(pseq[i]->marginal_histogram[j])); + } + + inb_variable++; + } + } + + delete [] pseq; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of variables of Sequences objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of Sequences objects, + * \param[in] iseq pointer on the Sequences objects, + * \param[in] ref_sample reference Sequences object for the identifiers. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::merge_variable(StatError &error , int nb_sample , + const vector &iseq , int ref_sample) const + +{ + int i; + Sequences *seq; + const Sequences **pseq; + + + pseq = new const Sequences*[nb_sample]; + for (i = 0;i < nb_sample;i++) { + pseq[i] = new Sequences(iseq[i]); + } + + seq = merge_variable(error , nb_sample , pseq , ref_sample); + + for (i = 0;i < nb_sample;i++) { + delete pseq[i]; + } + delete [] pseq; + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Differences between data and residuals in order to build auxiliary variables. + * + * \param[in] error reference on a StatError object, + * \param[in] residual reference on a Sequences object. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::difference_variable(StatError &error , const Sequences &residual) const + +{ + bool status = true; + int i , j , k , m; + int offset , inb_variable; + variable_nature *itype; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (((residual.vertex_identifier) && (!vertex_identifier)) || + ((!(residual.vertex_identifier)) && (vertex_identifier))) { + status = false; + error.update(SEQ_error[SEQR_SAMPLE_VERTEX_IDENTIFIER]); + } + + if (residual.index_param_type != index_param_type) { + status = false; + ostringstream error_message; + error_message << SEQ_error[SEQR_INDEX_PARAMETER_TYPE]; + + if (index_param_type == IMPLICIT_TYPE) { + error.update((error_message.str()).c_str()); + } + else { + error.correction_update((error_message.str()).c_str() , SEQ_index_parameter_word[index_param_type]); + } + } + + if (residual.nb_sequence != nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + for (i = 0;i < nb_sequence;i++) { + if (residual.identifier[i] != identifier[i]) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " + << SEQ_error[SEQR_SEQUENCE_IDENTIFIER]; + error.update((error_message.str()).c_str()); + } + + if (residual.length[i] != length[i]) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " + << SEQ_error[SEQR_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + + else { + if ((residual.vertex_identifier) && (vertex_identifier)) { + for (j = 0;j < length[i];j++) { + if (residual.vertex_identifier[i][j] != vertex_identifier[i][j]) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " + << SEQ_label[SEQL_VERTEX_IDENTIFIER] << " " << j << ": " + << SEQ_error[SEQR_VERTEX_IDENTIFIER]; + error.update((error_message.str()).c_str()); + } + } + } + + if ((residual.index_param_type != IMPLICIT_TYPE) && + (residual.index_param_type == index_param_type)) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + if (residual.index_parameter[i][j] != index_parameter[i][j]) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " + << SEQ_label[SEQL_INDEX] << " " << j << ": " + << SEQ_error[SEQR_INDEX_PARAMETER]; + error.update((error_message.str()).c_str()); + } + } + } + } + } + } + + if (residual.nb_variable != nb_variable) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , nb_variable); + } + + else { + if (type[0] == STATE) { + offset = 1; + + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + + if (residual.type[0] != STATE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " 1: " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[STATE]); + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (residual.int_sequence[i][0][j] != int_sequence[i][0][j]) { + status = false; + ostringstream error_message , correction_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " + << SEQ_label[SEQL_INDEX_PARAMETER] << " " << j << ": " + << SEQ_error[SEQR_STATE]; + correction_message << STAT_label[STATL_STATE] << " " << int_sequence[i][0][j]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + } + } + } + + else { + offset = 0; + } + + for (i = offset;i < nb_variable;i++) { + if (residual.type[i] != REAL_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[REAL_VALUE]); + } + } + } + + if (status) { + inb_variable = offset + (nb_variable - offset) * 2; + + itype = new variable_nature[inb_variable]; + + if (type[0] == STATE) { + itype[0] = type[0]; + } + i = offset; + for (j = offset;j < nb_variable;j++) { + itype[i++] = type[j]; + itype[i++] = AUXILIARY; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , inb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + // copy of variables and building of auxiliary variables + + for (i = 0;i < nb_sequence;i++) { + if (type[0] == STATE) { + for (j = 0;j < length[i];j++) { + seq->int_sequence[i][0][j] = int_sequence[i][0][j]; + } + } + + j = offset; + for (k = offset;k < nb_variable;k++) { + switch (type[k]) { + + case INT_VALUE : { + for (m = 0;m < length[i];m++) { + seq->int_sequence[i][j][m] = int_sequence[i][k][m]; + } + j++; + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = int_sequence[i][k][m] - residual.real_sequence[i][k][m]; + } + j++; + break; + } + + case REAL_VALUE : { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = real_sequence[i][k][m]; + } + j++; + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = real_sequence[i][k][m] - residual.real_sequence[i][k][m]; + } + j++; + break; + } + } + } + } + + if (type[0] == STATE) { + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + } + + i = offset; + for (j = offset;j < nb_variable;j++) { + seq->min_value[i] = min_value[j]; + seq->max_value[i] = max_value[j]; + + if (marginal_distribution[j]) { + seq->marginal_distribution[i] = new FrequencyDistribution(*marginal_distribution[j]); + } + else { + seq->marginal_distribution[i] = NULL; + } + + if (marginal_histogram[j]) { + seq->marginal_histogram[i] = new Histogram(*marginal_histogram[j]); + } + else { + seq->marginal_histogram[i] = NULL; + } + i++; + + seq->min_value_computation(i); + seq->max_value_computation(i); + i++; + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Variable shift. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] lag lag. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::shift_variable(StatError &error , int variable , int lag) const + +{ + bool status = true; + int i , j , k; + int inb_sequence , *iidentifier , *ilength , *index , *pvertex_id , *cvertex_id , + *pindex_param , *cindex_param , *pisequence , *cisequence; + double *prsequence , *crsequence; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (index_param_type == POSITION) { + error.update(SEQ_error[SEQR_INDEX_PARAMETER]); + status = false; + } + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (type[variable] == AUXILIARY) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[REAL_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + } + + if ((lag == 0) || (abs(lag) > length_distribution->mean)) { + status = false; + error.update(STAT_error[SEQR_VARIABLE_LAG]); + } + + if (status) { + + // computation of the sequence lengths + + iidentifier = new int[nb_sequence]; + ilength = new int[nb_sequence]; + index = new int[nb_sequence]; + inb_sequence = 0; + + for (i = 0;i < nb_sequence;i++) { + if (abs(lag) < length[i]) { + iidentifier[inb_sequence] = identifier[i]; + ilength[inb_sequence] = length[i] - abs(lag); + index[inb_sequence++] = i; + } + } + + seq = new Sequences(inb_sequence , iidentifier , ilength , vertex_identifier , + index_param_type , nb_variable , type , false); + + // copy of vertex identifiers + + if (vertex_identifier) { + for (i = 0;i < seq->nb_sequence;i++) { + pvertex_id = seq->vertex_identifier[i]; + + if (lag > 0) { + cvertex_id = vertex_identifier[index[i]] + lag; + } + else { + cvertex_id = vertex_identifier[index[i]]; + } + + for (j = 0;j < seq->length[i];j++) { + *pvertex_id++ = *cvertex_id++; + } + } + } + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + pindex_param = seq->index_parameter[i]; + + if (lag > 0) { + cindex_param = index_parameter[index[i]] + lag; + } + else { + cindex_param = index_parameter[index[i]]; + } + + for (j = 0;j < seq->length[i];j++) { + *pindex_param++ = *cindex_param++; + } + } + + seq->build_index_parameter_frequency_distribution(); + seq->index_interval_computation(); + } + + // copy of values + + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->nb_variable;j++) { + if ((seq->type[j] != REAL_VALUE) && (seq->type[j] != AUXILIARY)) { + pisequence = seq->int_sequence[i][j]; + + if ((j != variable) && (lag > 0)) { + cisequence = int_sequence[index[i]][j] + lag; + } + else if ((j == variable) && (lag < 0)) { + cisequence = int_sequence[index[i]][j] - lag; + } + else { + cisequence = int_sequence[index[i]][j]; + } + + for (k = 0;k < seq->length[i];k++) { + *pisequence++ = *cisequence++; + } + } + + else { + prsequence = seq->real_sequence[i][j]; + + if ((j != variable) && (lag > 0)) { + crsequence = real_sequence[index[i]][j] + lag; + } + else if ((j == variable) && (lag < 0)) { + crsequence = real_sequence[index[i]][j] - lag; + } + else { + crsequence = real_sequence[index[i]][j]; + } + + for (k = 0;k < seq->length[i];k++) { + *prsequence++ = *crsequence++; + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + if (seq->type[i] != AUXILIARY) { + seq->build_marginal_frequency_distribution(i); + } + } + + delete [] iidentifier; + delete [] ilength; + delete [] index; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Reversing of the direction of sequences. + * + * \param[in] error reference on a StatError object. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::reverse(StatError &error) const + +{ + Sequences *seq; + + + error.init(); + + if (index_param_type == TIME) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER]); + } + + else { + seq = new Sequences(*this , REVERSE); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of sequences on a sequence length criterion. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the selected individuals, + * \param[in] min_length lowest sequence length, + * \param[in] imax_length highest sequence length, + * \param[in] keep flag for keeping or rejecting the selected sequences. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::length_select(StatError &error , ostream *os , int min_length , + int imax_length , bool keep) const + +{ + bool status = true; + int i; + int inb_sequence , *index , *iidentifier; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((min_length < 1) || (min_length >= length_distribution->nb_value) || (min_length > imax_length)) { + status = false; + error.update(SEQ_error[SEQR_MIN_SEQUENCE_LENGTH]); + } + if ((imax_length < length_distribution->offset) || (imax_length < min_length)) { + status = false; + error.update(SEQ_error[SEQR_MAX_SEQUENCE_LENGTH]); + } + + if (status) { + + // selection of sequences + + iidentifier = new int[nb_sequence]; + index = new int[nb_sequence]; + inb_sequence = 0; + + for (i = 0;i < nb_sequence;i++) { + if ((length[i] >= min_length) && (length[i] <= imax_length)) { + if (keep) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + } + + else if (!keep) { + iidentifier[inb_sequence] = identifier[i]; + index[inb_sequence++] = i; + } + } + + if (inb_sequence == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + // copy of selected sequences + + if (status) { + if ((os) && (inb_sequence <= DISPLAY_NB_INDIVIDUAL)) { + *os << "\n" << SEQ_label[inb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << ": "; + for (i = 0;i < inb_sequence;i++) { + *os << iidentifier[i] << ", "; + } + *os << endl; + } + + seq = new Sequences(*this , inb_sequence , index); + } + + delete [] iidentifier; + delete [] index; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Removing of the first/last run for a given value. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] ivalue value, + * \param[in] position position (BEGIN_RUN/END_RUN), + * \param[in] max_run_length maximum length of the removed runs. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::remove_run(StatError &error , int variable , int ivalue , + run_position position , int max_run_length) const + +{ + bool status = true; + int i , j , k; + int smax_length , inb_sequence , *iidentifier , *ilength , *index , *pvertex_id , + *cvertex_id , *pindex_param , *cindex_param , *pisequence , *cisequence; + double *prsequence , *crsequence; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (index_param_type == POSITION) { + error.update(SEQ_error[SEQR_INDEX_PARAMETER]); + status = false; + } + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE) && + (type[variable] != REAL_VALUE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if (!marginal_distribution[variable]) { + if ((ivalue < min_value[variable]) || (ivalue > max_value[variable])) { + status = false; + error.update(STAT_error[STATR_VALUE]); + } +/* status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); */ + } + + else if ((ivalue < marginal_distribution[variable]->offset) || + (ivalue >= marginal_distribution[variable]->nb_value) || + (marginal_distribution[variable]->frequency[ivalue] == 0)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_label[STATL_VALUE] << " " << ivalue << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + } + } + + if ((max_run_length < I_DEFAULT) || (max_run_length == 0)) { + status = false; + error.update(SEQ_error[SEQR_MAX_RUN_LENGTH]); + } + + if (status) { + + // computation of the sequence lengths + + iidentifier = new int[nb_sequence]; + ilength = new int[nb_sequence]; + index = new int[nb_sequence]; + inb_sequence = 0; + + for (i = 0;i < nb_sequence;i++) { + if (max_run_length == I_DEFAULT) { + smax_length = length[i]; + } + else { + smax_length = MIN(max_run_length , length[i]); + } + + switch (position) { + + case BEGIN_RUN : { + if (type[variable] != REAL_VALUE) { + cisequence = int_sequence[i][variable]; + for (j = 0;j < smax_length;j++) { + if (*cisequence++ != ivalue) { + break; + } + } + } + + else { + crsequence = real_sequence[i][variable]; + for (j = 0;j < smax_length;j++) { + if (*crsequence++ != (double)ivalue) { + break; + } + } + } + break; + } + + case END_RUN : { + if (type[variable] != REAL_VALUE) { + cisequence = int_sequence[i][variable] + length[i]; + for (j = 0;j < smax_length;j++) { + if (*--cisequence != ivalue) { + break; + } + } + } + + else { + crsequence = real_sequence[i][variable] + length[i]; + for (j = 0;j < smax_length;j++) { + if (*--crsequence != (double)ivalue) { + break; + } + } + } + break; + } + } + + if (j < length[i]) { + iidentifier[inb_sequence] = identifier[i]; + ilength[inb_sequence] = length[i] - j; + index[inb_sequence++] = i; + } + } + + seq = new Sequences(inb_sequence , iidentifier , ilength , vertex_identifier , + index_param_type , nb_variable , type , false); + + // copy of vertex identifiers + + if (vertex_identifier) { + for (i = 0;i < seq->nb_sequence;i++) { + pvertex_id = seq->vertex_identifier[i]; + + switch (position) { + case BEGIN_RUN : + cvertex_id = vertex_identifier[index[i]] + length[index[i]] - seq->length[i]; + break; + case END_RUN : + cvertex_id = vertex_identifier[index[i]]; + break; + } + + for (j = 0;j < seq->length[i];j++) { + *pvertex_id++ = *cvertex_id++; + } + } + } + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + pindex_param = seq->index_parameter[i]; + + switch (position) { + case BEGIN_RUN : + cindex_param = index_parameter[index[i]] + length[index[i]] - seq->length[i]; + break; + case END_RUN : + cindex_param = index_parameter[index[i]]; + break; + } + + for (j = 0;j < seq->length[i];j++) { + *pindex_param++ = *cindex_param++; + } + } + + seq->build_index_parameter_frequency_distribution(); + seq->index_interval_computation(); + } + + // copy of values + + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->nb_variable;j++) { + if ((seq->type[j] != REAL_VALUE) && (seq->type[j] != AUXILIARY)) { + pisequence = seq->int_sequence[i][j]; + + switch (position) { + case BEGIN_RUN : + cisequence = int_sequence[index[i]][j] + length[index[i]] - seq->length[i]; + break; + case END_RUN : + cisequence = int_sequence[index[i]][j]; + break; + } + + for (k = 0;k < seq->length[i];k++) { + *pisequence++ = *cisequence++; + } + } + + else { + prsequence = seq->real_sequence[i][j]; + + switch (position) { + case BEGIN_RUN : + crsequence = real_sequence[index[i]][j] + length[index[i]] - seq->length[i]; + break; + case END_RUN : + crsequence = real_sequence[index[i]][j]; + break; + } + + for (k = 0;k < seq->length[i];k++) { + *prsequence++ = *crsequence++; + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + } + + delete [] iidentifier; + delete [] ilength; + delete [] index; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Truncation of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] max_index_parameter highest index parameter. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::truncate(StatError &error , int max_index_parameter) const + +{ + int i , j , k; + int *ilength; + Sequences *seq; + + + error.init(); + + if ((max_index_parameter < (index_parameter ? index_parameter_distribution->offset : 1)) || + ((!index_parameter) && (max_index_parameter >= max_length))) { + seq = NULL; + error.update(SEQ_error[SEQR_MAX_INDEX_PARAMETER]); + } + + else { + ilength = new int[nb_sequence]; + + // explicit index parameters + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = length[i] - 1;j >= 0;j--) { + if (index_parameter[i][j] <= max_index_parameter) { + break; + } + } + ilength[i] = MAX(j + 1 , 1); + +# ifdef MESSAGE + if (ilength[i] == 0) { + cout << "\n" << identifier[i] << " " << j << " " << length[i] << endl; + } +# endif + + } + } + + // implicit index parameters + + else { + for (i = 0;i < nb_sequence;i++) { + ilength[i] = MIN(max_index_parameter + 1 , length[i]); + } + } + + // extraction of truncated sequences + + seq = new Sequences(nb_sequence , identifier , ilength , vertex_identifier , + index_param_type , nb_variable , type); + delete [] ilength; + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + + if (seq->index_param_type == POSITION) { + seq->index_parameter[i][seq->length[i]] = (index_interval->variance == 0. ? index_parameter[i][seq->length[i]] : index_parameter[i][seq->length[i] - 1]); + } + } + + seq->build_index_parameter_frequency_distribution(); + seq->index_interval_computation(); + } + + // copy of values + + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->nb_variable;j++) { + if ((seq->type[j] != REAL_VALUE) && (seq->type[j] != AUXILIARY)) { + for (k = 0;k < seq->length[i];k++) { + seq->int_sequence[i][j][k] = int_sequence[i][j][k]; + } + } + + else { + for (k = 0;k < seq->length[i];k++) { + seq->real_sequence[i][j][k] = real_sequence[i][j][k]; + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + if (seq->type[i] != AUXILIARY) { + seq->build_marginal_frequency_distribution(i); + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of sub-sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] min_index_parameter lowest index parameter, + * \param[in] max_index_parameter highest index parameter. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::index_parameter_extract(StatError &error , int min_index_parameter , + int max_index_parameter) const + +{ + bool status = true; + int i , j , k; + int inb_sequence , *iidentifier , *ilength , *index , *first_index , *pvertex_id , + *cvertex_id , *pindex_param , *cindex_param , *pisequence , *cisequence; + double *prsequence , *crsequence; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((min_index_parameter < (index_parameter ? index_parameter_distribution->offset : 0)) || + ((!index_parameter) && (min_index_parameter >= max_length)) || + ((max_index_parameter != I_DEFAULT) && (min_index_parameter > max_index_parameter))) { + status = false; + error.update(SEQ_error[SEQR_MIN_INDEX_PARAMETER]); + } + if ((max_index_parameter != I_DEFAULT) && ((max_index_parameter < (index_parameter ? index_parameter_distribution->offset : 0)) || + ((!index_parameter) && (max_index_parameter >= max_length)) || (max_index_parameter < min_index_parameter))) { + status = false; + error.update(SEQ_error[SEQR_MAX_INDEX_PARAMETER]); + } + + if (status) { + + // selection of sequences + + iidentifier = new int[nb_sequence]; + ilength = new int[nb_sequence]; + index = new int[nb_sequence]; + inb_sequence = 0; + + // explicit index parameters + + if (index_parameter) { + first_index = new int[nb_sequence]; + + if (max_index_parameter == I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (index_parameter[i][length[i] - 1] >= min_index_parameter) { + iidentifier[inb_sequence] = identifier[i]; + + for (j = 0;j < length[i];j++) { + if (index_parameter[i][j] >= min_index_parameter) { + break; + } + } + first_index[inb_sequence] = j; + ilength[inb_sequence] = length[i] - j; + + index[inb_sequence++] = i; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + if ((index_parameter[i][0] <= min_index_parameter) && + (index_parameter[i][length[i] - 1] >= max_index_parameter)) { + iidentifier[inb_sequence] = identifier[i]; + + for (j = 0;j < length[i];j++) { + if (index_parameter[i][j] >= min_index_parameter) { + break; + } + } + first_index[inb_sequence] = j; + ilength[inb_sequence] = length[i] - j; + + for (j = length[i] - 1;j >= 0;j--) { + if (index_parameter[i][j] <= max_index_parameter) { + break; + } + } + ilength[inb_sequence] -= (length[i] - 1 - j); + + index[inb_sequence++] = i; + } + } + } + } + + // implicit index parameters + + else { + if (max_index_parameter == I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (length[i] > min_index_parameter) { + iidentifier[inb_sequence] = identifier[i]; + ilength[inb_sequence] = length[i] - min_index_parameter; + index[inb_sequence++] = i; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + if (length[i] > max_index_parameter) { + iidentifier[inb_sequence] = identifier[i]; + ilength[inb_sequence] = max_index_parameter - min_index_parameter + 1; + index[inb_sequence++] = i; + } + } + } + } + + if (inb_sequence == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + // extraction of sub-sequences + + if (status) { + seq = new Sequences(inb_sequence , iidentifier , ilength , vertex_identifier , + index_param_type , nb_variable , type , false); + + // copy of vertex identifiers + + if (vertex_identifier) { + for (i = 0;i < seq->nb_sequence;i++) { + pvertex_id = seq->vertex_identifier[i]; + cvertex_id = vertex_identifier[index[i]] + first_index[i]; + for (j = 0;j < seq->length[i];j++) { + *pvertex_id++ = *cvertex_id++; + } + } + } + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + pindex_param = seq->index_parameter[i]; + cindex_param = index_parameter[index[i]] + first_index[i]; + for (j = 0;j < seq->length[i];j++) { + *pindex_param++ = *cindex_param++; + } + + if (seq->index_param_type == POSITION) { + if (max_index_parameter == I_DEFAULT) { + *pindex_param = *cindex_param; + } + else { + *pindex_param = (index_interval->variance == 0. ? max_index_parameter + index_interval->mean : max_index_parameter); + } + } + } + } + + // copy of values + + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->nb_variable;j++) { + if ((seq->type[j] != REAL_VALUE) && (seq->type[j] != AUXILIARY)) { + pisequence = seq->int_sequence[i][j]; + if (seq->index_parameter) { + cisequence = int_sequence[index[i]][j] + first_index[i]; + } + else { + cisequence = int_sequence[index[i]][j] + min_index_parameter; + } + + for (k = 0;k < seq->length[i];k++) { + *pisequence++ = *cisequence++; + } + } + + else { + prsequence = seq->real_sequence[i][j]; + if (seq->index_parameter) { + crsequence = real_sequence[index[i]][j] + first_index[i]; + } + else { + crsequence = real_sequence[index[i]][j] + min_index_parameter; + } + + for (k = 0;k < seq->length[i];k++) { + *prsequence++ = *crsequence++; + } + } + } + } + + if (index_parameter_distribution) { + seq->build_index_parameter_frequency_distribution(); + } + if (index_interval) { + seq->index_interval_computation(); + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + } + + delete [] iidentifier; + delete [] ilength; + delete [] index; + + if (index_parameter) { + delete [] first_index; + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction by segmentation of a Sequences object. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] nb_value number of values, + * \param[in] ivalue values, + * \param[in] keep flag for keeping or rejecting the selected segments, + * \param[in] concatenation segments merged by sequence or not. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation_extract(StatError &error , int variable , + int nb_value , int *ivalue , bool keep , + bool concatenation) const + +{ + bool status = true; + int i , j , k , m , n; + int nb_present_value , nb_selected_value , nb_segment , inb_sequence , + *selected_value , *pvertex_id , *cvertex_id , *pindex_param , *cindex_param , + *pisequence , *cisequence , *segment_length , *sequence_length , **segment_begin; + variable_nature *itype; + double *prsequence , *crsequence; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (index_param_type == POSITION) { + error.update(SEQ_error[SEQR_INDEX_PARAMETER]); + status = false; + } + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if (!marginal_distribution[variable]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else { + nb_present_value = 0; + for (i = marginal_distribution[variable]->offset;i < marginal_distribution[variable]->nb_value;i++) { + if (marginal_distribution[variable]->frequency[i] > 0) { + nb_present_value++; + } + } + + if ((nb_value < 1) || (nb_value > (keep ? nb_present_value : nb_present_value - 1))) { + status = false; + error.update(SEQ_error[SEQR_NB_SELECTED_VALUE]); + } + + else { + selected_value = new int[marginal_distribution[variable]->nb_value]; + for (i = marginal_distribution[variable]->offset;i < marginal_distribution[variable]->nb_value;i++) { + selected_value[i] = false; + } + + for (i = 0;i < nb_value;i++) { + if ((ivalue[i] < marginal_distribution[variable]->offset) || + (ivalue[i] >= marginal_distribution[variable]->nb_value) || + (marginal_distribution[variable]->frequency[ivalue[i]] == 0)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_label[STATL_VALUE] << " " << ivalue[i] << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else if (selected_value[ivalue[i]]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VALUE] << " " << ivalue[i] << " " + << STAT_error[STATR_ALREADY_SELECTED]; + error.update((error_message.str()).c_str()); + } + else { + selected_value[ivalue[i]] = true; + } + } + + delete [] selected_value; + } + } + } + + if ((variable + 1 < nb_variable) && ((type[variable + 1] != INT_VALUE) && + (type[variable + 1] != STATE) && (type[variable + 1] != REAL_VALUE))) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (status) { + itype = new variable_nature[nb_variable - 1]; + for (i = 0;i < variable;i++) { + itype[i] = type[i]; + } + for (i = variable + 1;i < nb_variable;i++) { + itype[i - 1] = type[i]; + } + + if (keep) { + nb_selected_value = nb_value; + selected_value = ivalue; + } + + else { + nb_selected_value = nb_present_value - nb_value; + selected_value = new int[nb_selected_value]; + i = 0; + + for (j = marginal_distribution[variable]->offset;j < marginal_distribution[variable]->nb_value;j++) { + if (marginal_distribution[variable]->frequency[j] > 0) { + for (k = 0;k < nb_value;k++) { + if (ivalue[k] == j) { + break; + } + } + + if (k == nb_value) { + selected_value[i++] = j; + } + } + } + } + + // initializations + + segment_length = new int[cumul_length]; + + segment_begin = new int*[cumul_length]; + for (i = 0;i < cumul_length;i++) { + segment_begin[i] = 0; + } + + // search for the sub-sequences to be extracted + + i = -1; + + for (j = 0;j < nb_sequence;j++) { + pisequence = int_sequence[j][variable]; + + for (k = 0;k < nb_selected_value;k++) { + if (*pisequence == selected_value[k]) { + break; + } + } + + if (k < nb_selected_value) { + i++; + segment_begin[i] = new int[2]; + + segment_begin[i][0] = j; + segment_begin[i][1] = 0; + segment_length[i] = 1; + } + + for (k = 1;k < length[j];k++) { + pisequence++; + + for (m = 0;m < nb_selected_value;m++) { + if (*pisequence == selected_value[m]) { + break; + } + } + + if (m < nb_selected_value) { + if (*pisequence != *(pisequence - 1)) { + i++; + segment_begin[i] = new int[2]; + + segment_begin[i][0] = j; + segment_begin[i][1] = k; + segment_length[i] = 1; + } + else { + segment_length[i]++; + } + } + } + } + + nb_segment = i + 1; + + if (concatenation) { + sequence_length = new int[nb_sequence]; + + i = 0; + sequence_length[i] = segment_length[i]; + for (j = 1;j < nb_segment;j++) { + if (segment_begin[j][0] != segment_begin[j - 1][0]) { + i++; + sequence_length[i] = 0; + } + sequence_length[i] += segment_length[j]; + } + + inb_sequence = i + 1; + } + + else { + inb_sequence = nb_segment; + sequence_length = segment_length; + } + + // construction of the Sequences object + + seq = new Sequences(inb_sequence , NULL , sequence_length , vertex_identifier , + index_param_type , nb_variable - 1 , itype , false); + + // copy of vertex identifiers + + if (vertex_identifier) { + i = -1; + + for (j = 0;j < nb_segment;j++) { + if (concatenation) { + if ((j == 0) || (segment_begin[j][0] != segment_begin[j - 1][0])) { + pvertex_id = seq->vertex_identifier[++i]; + } + } + else { + pvertex_id = seq->vertex_identifier[j]; + } + + cvertex_id = vertex_identifier[segment_begin[j][0]] + segment_begin[j][1]; + for (k = 0;k < segment_length[j];k++) { + *pvertex_id++ = *cvertex_id++; + } + } + } + + // copy of index parameters + + if (index_parameter) { + i = -1; + + for (j = 0;j < nb_segment;j++) { + if (concatenation) { + if ((j == 0) || (segment_begin[j][0] != segment_begin[j - 1][0])) { + pindex_param = seq->index_parameter[++i]; + } + } + else { + pindex_param = seq->index_parameter[j]; + } + + cindex_param = index_parameter[segment_begin[j][0]] + segment_begin[j][1]; + for (k = 0;k < segment_length[j];k++) { + *pindex_param++ = *cindex_param++; + } + } + } + + if (index_parameter_distribution) { + seq->build_index_parameter_frequency_distribution(); + } + if (index_interval) { + seq->index_interval_computation(); + } + + // copy of values + + i = -1; + for (j = 0;j < nb_segment;j++) { + k = 0; + for (m = 0;m < nb_variable;m++) { + if (m != variable) { + if ((type[m] != REAL_VALUE) && (type[m] != AUXILIARY)) { + if (concatenation) { + if ((j == 0) || (segment_begin[j][0] != segment_begin[j - 1][0])) { + pisequence = seq->int_sequence[++i][k]; + } + } + else { + pisequence = seq->int_sequence[j][k]; + } + + cisequence = int_sequence[segment_begin[j][0]][m] + segment_begin[j][1]; + for (n = 0;n < segment_length[j];n++) { + *pisequence++ = *cisequence++; + } + } + + else { + if (concatenation) { + if ((j == 0) || (segment_begin[j][0] != segment_begin[j - 1][0])) { + prsequence = seq->real_sequence[++i][k]; + } + } + else { + prsequence = seq->real_sequence[j][k]; + } + + crsequence = real_sequence[segment_begin[j][0]][m] + segment_begin[j][1]; + for (n = 0;n < segment_length[j];n++) { + *prsequence++ = *crsequence++; + } + } + + k++; + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + } + + if (!keep) { + delete [] selected_value; + } + + delete [] segment_length; + + for (i = 0;i < nb_segment;i++) { + delete [] segment_begin[i]; + } + delete [] segment_begin; + + delete [] itype; + + if (concatenation) { + delete [] sequence_length; + } + +# ifdef MESSAGE + if ((seq->index_param_type == TIME) && (seq->index_interval->variance > 0.)) { // for the mango growth follow-ups + double average_diff , individual_mean , global_mean , diff , individual_variance , global_variance; + + for (i = 0;i < seq->nb_variable;i++) { + if (seq->type[i] == INT_VALUE) { + average_diff = 0.; + for (j = 0;j < seq->nb_sequence;j++) { + average_diff += seq->int_sequence[j][i][seq->length[j] - 1] - seq->int_sequence[j][i][0]; + } + average_diff /= seq->nb_sequence; + + global_mean = 0.; + individual_variance = 0.; + for (j = 0;j < seq->nb_sequence;j++) { + individual_mean = 0.; + for (k = 0;k < seq->length[j];k++) { + individual_mean += seq->int_sequence[j][i][k]; + } + global_mean += individual_mean; + individual_mean /= seq->length[j]; + + for (k = 0;k < seq->length[j];k++) { + diff = seq->int_sequence[j][i][k] - individual_mean; + individual_variance += diff * diff; + } + } + global_mean /= seq->cumul_length; + individual_variance /= (seq->cumul_length - 1); + + global_variance = 0.; + for (j = 0;j < seq->nb_sequence;j++) { + for (k = 0;k < seq->length[j];k++) { + diff = seq->int_sequence[j][i][k] - global_mean; + global_variance += diff * diff; + } + } + global_variance /= (seq->cumul_length - 1); + + cout << "\n" << STAT_label[STATL_VARIABLE] << " " << i + 1 << " - " + << "average difference: " << average_diff << " | " + << "within-individual variance: " << individual_variance << " | " + << "global variance: " << global_variance << " | " + << individual_variance / global_variance; + } + } + cout << endl; + } +# endif + + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction by segmentation of a Sequences object. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] nb_value number of values, + * \param[in] ivalue values, + * \param[in] keep flag for keeping or rejecting the selected segments, + * \param[in] concatenation segments merged by sequence or not. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::segmentation_extract(StatError &error , int variable , + int nb_value , vector &ivalue , bool keep , + bool concatenation) const + +{ + return segmentation_extract(error , variable , nb_value , ivalue.data() , keep , concatenation); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Summation of successive values along sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::cumulate(StatError &error , int variable) const + +{ + bool status = true; + int i , j , k , m; + int offset , inb_variable; + variable_nature *itype; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (type[0] == STATE) { + offset = 1; + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + } + else { + offset = 0; + } + + if (variable != I_DEFAULT) { + if ((variable < offset + 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + else { + variable--; + } + } + + for (i = offset;i < nb_variable;i++) { + if (((variable == I_DEFAULT) || (variable == i)) && (type[i] != INT_VALUE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (status) { + if (variable == I_DEFAULT) { + inb_variable = nb_variable; + itype = type; + } + + else { + inb_variable = offset + 1; + itype = new variable_nature[inb_variable]; + if (type[0] == STATE) { + itype[0] = type[0]; + } + itype[offset] = type[variable]; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , inb_variable , itype); + + if (variable != I_DEFAULT) { + delete [] itype; + } + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < (seq->index_param_type == POSITION ? seq->length[i] + 1 : seq->length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + // copy of the state variable + + if (type[0] == STATE) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->int_sequence[i][0][j] = int_sequence[i][0][j]; + } + } + + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + } + + // summation of values + + for (i = 0;i < nb_sequence;i++) { + j = offset; + for (k = offset;k < nb_variable;k++) { + if ((variable == I_DEFAULT) || (variable == k)) { + if (type[k] != REAL_VALUE) { + seq->int_sequence[i][j][0] = int_sequence[i][k][0]; + for (m = 1;m < length[i];m++) { + seq->int_sequence[i][j][m] = seq->int_sequence[i][j][m - 1] + int_sequence[i][k][m]; + } + } + + else { + seq->real_sequence[i][j][0] = real_sequence[i][k][0]; + for (m = 1;m < length[i];m++) { + seq->real_sequence[i][j][m] = seq->real_sequence[i][j][m - 1] + real_sequence[i][k][m]; + } + } + + j++; + } + } + } + + for (i = offset;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief First-order differencing of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] first_element 1st element of the sequence kept or not. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::difference(StatError &error , int variable , bool first_element) const + +{ + bool status = true; + int i , j , k , m; + int offset , inb_variable , *ilength , *pvertex_id , *cvertex_id , *pindex_param , *cindex_param , + *pisequence , *cisequence; + variable_nature *itype; + double *prsequence , *crsequence; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (index_param_type == POSITION) { + status = increasing_index_parameter_checking(error , true , SEQ_label[SEQL_SEQUENCE]); + } + + if (type[0] == STATE) { + offset = 1; + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + } + else { + offset = 0; + } + + if (variable != I_DEFAULT) { + if ((variable < offset + 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + else { + variable--; + } + } + + for (i = offset;i < nb_variable;i++) { + if (((variable == I_DEFAULT) || (variable == i)) && (type[i] != INT_VALUE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if ((!first_element) && (length_distribution->offset < 2)) { + status = false; + ostringstream correction_message; + correction_message << STAT_error[STATR_GREATER_THAN] << " " << 1; + error.correction_update(SEQ_error[SEQR_MIN_SEQUENCE_LENGTH] , (correction_message.str()).c_str()); + } + + if (status) { + if (first_element) { + ilength = length; + } + else { + ilength = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + ilength[i] = length[i] - 1; + } + } + + if (variable == I_DEFAULT) { + inb_variable = nb_variable; + } + else { + inb_variable = offset + 1; + } + + if ((index_param_type != IMPLICIT_TYPE) && ((index_interval->mean != 1.) || + (index_interval->variance > 0.))) { + itype = new variable_nature[inb_variable]; + if (type[0] == STATE) { + itype[0] = type[0]; + } + for (i = offset;i < inb_variable;i++) { + itype[i] = REAL_VALUE; + } + } + + else { + if (variable == I_DEFAULT) { + itype = type; + } + else { + itype = new variable_nature[inb_variable]; + if (type[0] == STATE) { + itype[0] = type[0]; + } + itype[offset] = type[variable]; + } + } + + seq = new Sequences(nb_sequence , identifier , ilength , vertex_identifier , + index_param_type , inb_variable , itype , false); + + if (!first_element) { + delete [] ilength; + } + if (((index_param_type != IMPLICIT_TYPE) && ((index_interval->mean != 1.) || + (index_interval->variance > 0.))) || (variable != I_DEFAULT)) { + delete [] itype; + } + + // copy of vertex identifiers + + if (vertex_identifier) { + for (i = 0;i < seq->nb_sequence;i++) { + pvertex_id = seq->vertex_identifier[i]; + if (first_element) { + cvertex_id = vertex_identifier[i]; + } + else { + cvertex_id = vertex_identifier[i] + 1; + } + + for (j = 0;j < seq->length[i];j++) { + *pvertex_id++ = *cvertex_id++; + } + } + } + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + pindex_param = seq->index_parameter[i]; + if (first_element) { + cindex_param = index_parameter[i]; + } + else { + cindex_param = index_parameter[i] + 1; + } + + for (j = 0;j < (seq->index_param_type == POSITION ? seq->length[i] + 1 : seq->length[i]);j++) { + *pindex_param++ = *cindex_param++; + } + } + } + + if (first_element) { + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + } + + else { + seq->build_index_parameter_frequency_distribution(); + if (index_interval) { + seq->index_interval_computation(); + } + } + + // copy of the state variable + + if (type[0] == STATE) { + for (i = 0;i < seq->nb_sequence;i++) { + pisequence = seq->int_sequence[i][0]; + if (first_element) { + cisequence = int_sequence[i][0]; + } + else { + cisequence = int_sequence[i][0] + 1; + } + for (j = 0;j < seq->length[i];j++) { + *pisequence++ = *cisequence++; + } + } + + if (first_element) { + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + } + + else { + seq->min_value_computation(0); + seq->max_value_computation(0); + + seq->build_marginal_frequency_distribution(0); + } + } + + // differencing of sequences + + if ((index_param_type != IMPLICIT_TYPE) && ((index_interval->mean != 1.) || + (index_interval->variance > 0.))) { + for (i = 0;i < nb_sequence;i++) { + j = offset; + for (k = offset;k < nb_variable;k++) { + if ((variable == I_DEFAULT) || (variable == k)) { + prsequence = seq->real_sequence[i][j]; + cindex_param = index_parameter[i]; + + if (type[k] != REAL_VALUE) { + cisequence = int_sequence[i][k]; + + if (first_element) { + if (*cindex_param > 0) { + *prsequence++ = (double)*cisequence / (double)*cindex_param; + } + else { + *prsequence++ = D_DEFAULT; + } + } + + for (m = 0;m < length[i] - 1;m++) { + *prsequence++ = (double)(*(cisequence + 1) - *cisequence) / + (double)(*(cindex_param + 1) - *cindex_param); + cindex_param++; + cisequence++; + } + } + + else { + crsequence = real_sequence[i][k]; + + if (first_element) { + if (*cindex_param > 0) { + *prsequence++ = *crsequence / *cindex_param; + } + else { + *prsequence++ = D_DEFAULT; + } + } + + for (m = 0;m < length[i] - 1;m++) { + *prsequence++ = (*(crsequence + 1) - *crsequence) / + (*(cindex_param + 1) - *cindex_param); + cindex_param++; + crsequence++; + } + } + + j++; + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + j = offset; + for (k = offset;k < nb_variable;k++) { + if ((variable == I_DEFAULT) || (variable == k)) { + if (type[k] != REAL_VALUE) { + pisequence = seq->int_sequence[i][j]; + cisequence = int_sequence[i][k]; + + if (first_element) { + *pisequence++ = *cisequence; + } + for (m = 0;m < length[i] - 1;m++) { + *pisequence++ = *(cisequence + 1) - *cisequence; + cisequence++; + } + } + + else { + prsequence = seq->real_sequence[i][j]; + crsequence = real_sequence[i][k]; + + if (first_element) { + *prsequence++ = *crsequence; + } + for (m = 0;m < length[i] - 1;m++) { + *prsequence++ = *(crsequence + 1) - *crsequence; + crsequence++; + } + } + + j++; + } + } + } + } + + for (i = offset;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Log-transform of values. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] base base of the logarithm. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::log_transform(StatError &error , int variable , log_base base) const + +{ + bool status = true; + int i , j , k , m; + int offset , inb_variable; + variable_nature *itype; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (type[0] == STATE) { + offset = 1; + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + } + else { + offset = 0; + } + + if (variable != I_DEFAULT) { + if ((variable < offset + 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + else { + variable--; + } + } + + for (i = offset;i < nb_variable;i++) { + if ((variable == I_DEFAULT) || (variable == i)) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else if (min_value[i] <= 0.) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + if (variable == I_DEFAULT) { + inb_variable = nb_variable; + } + else { + inb_variable = offset + 1; + } + + itype = new variable_nature[inb_variable]; + if (type[0] == STATE) { + itype[0] = type[0]; + } + for (i = offset;i < inb_variable;i++) { + itype[i] = REAL_VALUE; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , inb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < (seq->index_param_type == POSITION ? seq->length[i] + 1 : seq->length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + // copy of the state variable + + if (type[0] == STATE) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->int_sequence[i][0][j] = int_sequence[i][0][j]; + } + } + + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + } + + // log-transform of values + + switch (base) { + + case NATURAL : { + for (i = 0;i < nb_sequence;i++) { + j = offset; + for (k = offset;k < nb_variable;k++) { + if ((variable == I_DEFAULT) || (variable == k)) { + if (type[k] != REAL_VALUE) { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = log(int_sequence[i][k][m]); + } + } + + else { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = log(real_sequence[i][k][m]); + } + } + + j++; + } + } + } + break; + } + + case TWO : { + for (i = 0;i < nb_sequence;i++) { + j = offset; + for (k = offset;k < nb_variable;k++) { + if ((variable == I_DEFAULT) || (variable == k)) { + if (type[k] != REAL_VALUE) { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = log2(int_sequence[i][k][m]); + } + } + + else { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = log2(real_sequence[i][k][m]); + } + } + + j++; + } + } + } + break; + } + + case TEN : { + for (i = 0;i < nb_sequence;i++) { + j = offset; + for (k = offset;k < nb_variable;k++) { + if ((variable == I_DEFAULT) || (variable == k)) { + if (type[k] != REAL_VALUE) { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = log10(int_sequence[i][k][m]); + } + } + + else { + for (m = 0;m < length[i];m++) { + seq->real_sequence[i][j][m] = log10(real_sequence[i][k][m]); + } + } + + j++; + } + } + } + break; + } + } + + for (i = offset;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_histogram(i); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of relative growth rates on the basis of cumulative dimensions. + * + * \param[in] error reference on a StatError object, + * \param[in] growth_factor growth factor for computing the first relative growth rate. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::relative_growth_rate(StatError &error , double growth_factor) const + +{ + bool status = true , begin; + int i , j , k; + int offset; + variable_nature *itype; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (index_param_type == POSITION) { + status = increasing_index_parameter_checking(error , true , SEQ_label[SEQL_SEQUENCE]); + } + + if (type[0] == STATE) { + offset = 1; + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + } + else { + offset = 0; + } + + for (i = offset;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + +/* else { + lstatus = increasing_sequence_checking(error , i , false , SEQ_label[SEQL_SEQUENCE] , + STAT_label[STATL_VALUE]); + if (!lstatus) { + status = false; + } + } */ + + else if (min_value[i] < 0.) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + } + + if (status) { + itype = new variable_nature[nb_variable]; + if (type[0] == STATE) { + itype[0] = type[0]; + } + for (i = offset;i < nb_variable;i++) { + itype[i] = REAL_VALUE; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < (seq->index_param_type == POSITION ? seq->length[i] + 1 : seq->length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + // copy of the state variable + + if (type[0] == STATE) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->int_sequence[i][0][j] = int_sequence[i][0][j]; + } + } + + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + } + + // computaton of relative growth rates + + if ((index_param_type != IMPLICIT_TYPE) && ((index_interval->mean != 1.) || + (index_interval->variance > 0.))) { + for (i = 0;i < nb_sequence;i++) { + for (j = offset;j < nb_variable;j++) { + seq->real_sequence[i][j][0] = 0.; + begin = true; + + if (type[j] != REAL_VALUE) { + for (k = 1;k < length[i];k++) { + if ((int_sequence[i][j][k] > 0) && (int_sequence[i][j][k - 1] > 0)) { + seq->real_sequence[i][j][k] = (log(int_sequence[i][j][k]) - log(int_sequence[i][j][k - 1])) / + (index_parameter[i][k] - index_parameter[i][k - 1]); + + if (begin) { + begin = false; + seq->real_sequence[i][j][k - 1] = seq->real_sequence[i][j][k] * growth_factor; + } + } + + else { + seq->real_sequence[i][j][k] = 0.; + } + } + } + + else { + for (k = 1;k < length[i];k++) { + if ((real_sequence[i][j][k] > 0.) && (real_sequence[i][j][k - 1] > 0.)) { + seq->real_sequence[i][j][k] = (log(real_sequence[i][j][k]) - log(real_sequence[i][j][k - 1])) / + (index_parameter[i][k] - index_parameter[i][k - 1]); + + if (begin) { + begin = false; + seq->real_sequence[i][j][k - 1] = seq->real_sequence[i][j][k] * growth_factor; + } + } + + else { + seq->real_sequence[i][j][k] = 0.; + } + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = offset;j < nb_variable;j++) { + seq->real_sequence[i][j][0] = 0.; + begin = true; + + if (type[j] != REAL_VALUE) { + for (k = 1;k < length[i];k++) { + if ((int_sequence[i][j][k] > 0) && (int_sequence[i][j][k - 1] > 0)) { + seq->real_sequence[i][j][k] = log(int_sequence[i][j][k]) - log(int_sequence[i][j][k - 1]); + + if (begin) { + begin = false; + seq->real_sequence[i][j][k - 1] = seq->real_sequence[i][j][k] * growth_factor; + } + } + + else { + seq->real_sequence[i][j][k] = 0.; + } + } + } + + else { + for (k = 1;k < length[i];k++) { + if ((real_sequence[i][j][k] > 0.) && (real_sequence[i][j][k - 1] > 0.)) { + seq->real_sequence[i][j][k] = log(real_sequence[i][j][k]) - log(real_sequence[i][j][k - 1]); + + if (begin) { + begin = false; + seq->real_sequence[i][j][k - 1] = seq->real_sequence[i][j][k] * growth_factor; + } + } + + else { + seq->real_sequence[i][j][k] = 0.; + } + } + } + } + } + } + + for (i = offset;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_histogram(i); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Normalization of sequences (for mangoo GU growth profiles). + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::sequence_normalization(StatError &error , int variable) const + +{ + bool status = true; + int i , j , k; + int offset , int_max; + variable_nature *itype; + double real_max; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (type[0] == STATE) { + offset = 1; + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + } + else { + offset = 0; + } + + if (variable != I_DEFAULT) { + if ((variable < offset + 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + else { + variable--; + } + } + + for (i = offset;i < nb_variable;i++) { + if ((variable == I_DEFAULT) || (variable == i)) { + if ((type[i] != INT_VALUE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + if (min_value[i] < 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_POSITIVE_MIN_VALUE]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + itype = new variable_nature[nb_variable]; + + if (type[0] == STATE) { + itype[0] = type[0]; + } + + if (variable == I_DEFAULT) { + for (i = offset;i < nb_variable;i++) { + itype[i] = REAL_VALUE; + } + } + else { + for (i = offset;i < nb_variable;i++) { + itype[i] = type[i]; + } + itype[variable] = REAL_VALUE; + } + + seq = new Sequences(nb_sequence , identifier , length , vertex_identifier , + index_param_type , nb_variable , itype); + delete [] itype; + + // copy of index parameters + + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < (seq->index_param_type == POSITION ? seq->length[i] + 1 : seq->length[i]);j++) { + seq->index_parameter[i][j] = index_parameter[i][j]; + } + } + } + + // copy of the state variable + + if (type[0] == STATE) { + for (i = 0;i < seq->nb_sequence;i++) { + for (j = 0;j < seq->length[i];j++) { + seq->int_sequence[i][0][j] = int_sequence[i][0][j]; + } + } + + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + } + + // normalization of sequences + + for (i = 0;i < nb_sequence;i++) { + for (j = offset;j < nb_variable;j++) { + if ((variable == I_DEFAULT) || (variable == j)) { + if (type[j] != REAL_VALUE) { + int_max = int_sequence[i][j][0]; + for (k = 1;k < length[i];k++) { + if (int_sequence[i][j][k] > int_max) { + int_max = int_sequence[i][j][k]; + } + } + + for (k = 0;k < length[i];k++) { + seq->real_sequence[i][j][k] = (double)int_sequence[i][j][k] / (double)int_max; + } + } + + else { + real_max = real_sequence[i][j][0]; + for (k = 1;k < length[i];k++) { + if (real_sequence[i][j][k] > real_max) { + real_max = real_sequence[i][j][k]; + } + } + + for (k = 0;k < length[i];k++) { + seq->real_sequence[i][j][k] = real_sequence[i][j][k] / real_max; + } + } + } + } + } + + for (i = offset;i < seq->nb_variable;i++) { + if ((variable == I_DEFAULT) || (variable == i)) { + seq->min_value_computation(i); + seq->max_value[i] = 1.; + + seq->build_marginal_histogram(i); + } + + else { + seq->min_value[i] = min_value[i]; + seq->max_value[i] = max_value[i]; + + if (marginal_distribution[i]) { + seq->marginal_distribution[i] = new FrequencyDistribution(*marginal_distribution[i]); + } + if (marginal_histogram[i]) { + seq->marginal_histogram[i] = new Histogram(*marginal_histogram[i]); + } + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Filtering of sequences using a symmetric smoothing filter. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_point filter half width, + * \param[in] filter filter, + * \param[in] variable variable index, + * \param[in] begin_end begin and end kept or not, + * \param[in] segmentation smoothing by segment or not using the state variable, + * \param[in] output trend, substraction residuals or division residuals. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::moving_average(StatError &error , int nb_point , double *filter , + int variable , bool begin_end , bool segmentation , + sequence_type output) const + +{ + bool status = true; + int i , j , k , m , n , p; + int offset , inb_variable , nb_segment , *ilength , *pvertex_id , *cvertex_id , *pindex_param , *cindex_param , + *pisequence , *cisequence , *change_point; + variable_nature *itype; + double *prsequence , *crsequence , *pfilter; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((index_interval) && (index_interval->variance > 0.)) { + status = false; + error.update(SEQ_error[SEQR_UNEQUAL_INDEX_INTERVALS]); + } + + if (type[0] == STATE) { + offset = 1; + if (nb_variable == 1) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE]); + } + } + else { + offset = 0; + } + + if (variable != I_DEFAULT) { + if ((variable < offset + 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + else { + variable--; + } + } + + for (i = offset;i < nb_variable;i++) { + if (((variable == I_DEFAULT) || (variable == i)) && (type[i] != INT_VALUE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if ((!begin_end) && (length_distribution->offset < 2 * nb_point + 1)) { + status = false; + ostringstream correction_message; + correction_message << STAT_error[STATR_GREATER_THAN] << " " << 2 * nb_point; + error.correction_update(SEQ_error[SEQR_MIN_SEQUENCE_LENGTH] , (correction_message.str()).c_str()); + } + + if (status) { + if (begin_end) { + ilength = length; + } + else { + ilength = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + ilength[i] = length[i] - 2 * nb_point; + } + } + + if (variable == I_DEFAULT) { + inb_variable = nb_variable - offset; + } + else { + inb_variable = 1; + } + + if (output == SEQUENCE) { + inb_variable *= 2; + } + inb_variable += offset; + + itype = new variable_nature[inb_variable]; + + if (type[0] == STATE) { + itype[0] = type[0]; + } + + if (output == SEQUENCE) { + i = offset; + for (j = offset;j < nb_variable;j++) { + if ((variable == I_DEFAULT) || (variable == j)) { + itype[i++] = type[j]; + itype[i++] = AUXILIARY; + } + } + } + + else { + for (i = offset;i < inb_variable;i++) { + itype[i] = REAL_VALUE; + } + } + + seq = new Sequences(nb_sequence , identifier , ilength , vertex_identifier , + index_param_type , inb_variable , itype , false); + + if (!begin_end) { + delete [] ilength; + } + delete [] itype; + + // copy of vertex identifiers + + if (vertex_identifier) { + for (i = 0;i < seq->nb_sequence;i++) { + pvertex_id = seq->vertex_identifier[i]; + if (begin_end) { + cvertex_id = vertex_identifier[i]; + } + else { + cvertex_id = vertex_identifier[i] + nb_point; + } + + for (j = 0;j < seq->length[i];j++) { + *pvertex_id++ = *cvertex_id++; + } + } + } + + // copy of index parameters + + if (index_parameter) { + for (i = 0;i < seq->nb_sequence;i++) { + pindex_param = seq->index_parameter[i]; + if (begin_end) { + cindex_param = index_parameter[i]; + } + else { + cindex_param = index_parameter[i] + nb_point; + } + + for (j = 0;j < (seq->index_param_type == POSITION ? seq->length[i] + 1 : seq->length[i]);j++) { + *pindex_param++ = *cindex_param++; + } + } + } + + if (begin_end) { + if (index_parameter_distribution) { + seq->index_parameter_distribution = new FrequencyDistribution(*index_parameter_distribution); + } + if (index_interval) { + seq->index_interval = new FrequencyDistribution(*index_interval); + } + } + + else { + seq->build_index_parameter_frequency_distribution(); + if (index_interval) { + seq->index_interval_computation(); + } + } + + // copy of the state variable + + if (type[0] == STATE) { + for (i = 0;i < seq->nb_sequence;i++) { + pisequence = seq->int_sequence[i][0]; + if (begin_end) { + cisequence = int_sequence[i][0]; + } + else { + cisequence = int_sequence[i][0] + nb_point; + } + + for (j = 0;j < seq->length[i];j++) { + *pisequence++ = *cisequence++; + } + } + + if (begin_end) { + seq->min_value[0] = min_value[0]; + seq->max_value[0] = max_value[0]; + seq->marginal_distribution[0] = new FrequencyDistribution(*marginal_distribution[0]); + } + + else { + seq->min_value_computation(0); + seq->max_value_computation(0); + seq->build_marginal_frequency_distribution(0); + } + } + + // filtering using a symmetric smoothing filter + + if ((type[0] == STATE) && (begin_end) && (segmentation)) { + change_point = new int[max_length]; + + for (i = 0;i < nb_sequence;i++) { + change_point[0] = 0; + j = 1; + for (k = 1;k < length[i];k++) { + if (int_sequence[i][0][k] != int_sequence[i][0][k - 1]) { + change_point[j++] = k; + } + } + change_point[j] = length[i]; + nb_segment = j; + + j = 1; + for (k = 1;k < nb_variable;k++) { + if ((variable == I_DEFAULT) || (variable == k)) { + prsequence = seq->real_sequence[i][output == SEQUENCE ? j + 1 : j]; + + switch (type[k]) { + + case INT_VALUE : { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < MIN(change_point[m] + nb_point , change_point[m + 1]);n++) { + cisequence = int_sequence[i][k] + change_point[m]; + pfilter = filter; + *prsequence = 0.; + for (p = 0;p < 2 * nb_point + 1;p++) { + *prsequence += *cisequence * *pfilter++; + if ((n - nb_point + p >= change_point[m]) && (n - nb_point + p < change_point[m + 1] - 1)) { + cisequence++; + } + } + prsequence++; + } + + for (n = change_point[m] + nb_point;n < change_point[m + 1] - nb_point;n++) { + cisequence = int_sequence[i][k] + n - nb_point; + pfilter = filter; + *prsequence = 0.; + for (p = 0;p < 2 * nb_point + 1;p++) { + *prsequence += *cisequence++ * *pfilter++; + } + prsequence++; + } + + for (n = MAX(change_point[m + 1] - nb_point , change_point[m] + nb_point);n < change_point[m + 1];n++) { + cisequence = int_sequence[i][k] + n - nb_point; + pfilter = filter; + *prsequence = 0.; + for (p = 0;p < 2 * nb_point + 1;p++) { + *prsequence += *cisequence * *pfilter++; + if (n - nb_point + p < change_point[m + 1] - 1) { + cisequence++; + } + } + prsequence++; + } + } + + cisequence = int_sequence[i][k]; + + switch (output) { + + case SEQUENCE : { + pisequence = seq->int_sequence[i][j]; + for (m = 0;m < seq->length[i];m++) { + *pisequence++ = *cisequence++; + } + break; + } + + case SUBTRACTION_RESIDUAL : { + prsequence = seq->real_sequence[i][j]; + for (m = 0;m < seq->length[i];m++) { + *prsequence = *cisequence++ - *prsequence; + prsequence++; + } + break; + } + + case DIVISION_RESIDUAL : { + prsequence = seq->real_sequence[i][j]; + for (m = 0;m < seq->length[i];m++) { + if (*prsequence != 0.) { + *prsequence = *cisequence / *prsequence; + } + prsequence++; + cisequence++; + } + break; + } + } + break; + } + + case REAL_VALUE : { + for (m = 0;m < nb_segment;m++) { + for (n = change_point[m];n < MIN(change_point[m] + nb_point , change_point[m + 1]);n++) { + crsequence = real_sequence[i][k] + change_point[m]; + pfilter = filter; + *prsequence = 0.; + for (p = 0;p < 2 * nb_point + 1;p++) { + *prsequence += *crsequence * *pfilter++; + if ((n - nb_point + p >= change_point[m]) && (n - nb_point + p < change_point[m + 1] - 1)) { + crsequence++; + } + } + prsequence++; + } + + for (n = change_point[m] + nb_point;n < change_point[m + 1] - nb_point;n++) { + crsequence = real_sequence[i][k] + n - nb_point; + pfilter = filter; + *prsequence = 0.; + for (p = 0;p < 2 * nb_point + 1;p++) { + *prsequence += *crsequence++ * *pfilter++; + } + prsequence++; + } + + for (n = MAX(change_point[m + 1] - nb_point , change_point[m] + nb_point);n < change_point[m + 1];n++) { + crsequence = real_sequence[i][k] + n - nb_point; + pfilter = filter; + *prsequence = 0.; + for (p = 0;p < 2 * nb_point + 1;p++) { + *prsequence += *crsequence * *pfilter++; + if (n - nb_point + p < change_point[m + 1] - 1) { + crsequence++; + } + } + prsequence++; + } + } + + prsequence = seq->real_sequence[i][j]; + crsequence = real_sequence[i][k]; + + switch (output) { + + case SEQUENCE : { + for (m = 0;m < seq->length[i];m++) { + *prsequence++ = *crsequence++; + } + break; + } + + case SUBTRACTION_RESIDUAL : { + for (m = 0;m < seq->length[i];m++) { + *prsequence = *crsequence++ - *prsequence; + prsequence++; + } + break; + } + + case DIVISION_RESIDUAL : { + for (m = 0;m < seq->length[i];m++) { + if (*prsequence != 0.) { + *prsequence = *crsequence / *prsequence; + } + prsequence++; + crsequence++; + } + break; + } + } + break; + } + } + + if (output == SEQUENCE) { + j++; + } + j++; + } + } + } + + delete [] change_point; + } + + else { + for (i = 0;i < nb_sequence;i++) { + j = offset; + for (k = offset;k < nb_variable;k++) { + if ((variable == I_DEFAULT) || (variable == k)) { + prsequence = seq->real_sequence[i][output == SEQUENCE ? j + 1 : j]; + + switch (type[k]) { + + case INT_VALUE : { + if (begin_end) { + for (m = 0;m < MIN(nb_point , length[i]);m++) { + cisequence = int_sequence[i][k]; + pfilter = filter; + *prsequence = 0.; + for (n = 0;n < 2 * nb_point + 1;n++) { + *prsequence += *cisequence * *pfilter++; + if ((m - nb_point + n >= 0) && (m - nb_point + n < length[i] - 1)) { + cisequence++; + } + } + prsequence++; + } + } + + for (m = nb_point;m < length[i] - nb_point;m++) { + cisequence = int_sequence[i][k] + m - nb_point; + pfilter = filter; + *prsequence = 0.; + for (n = 0;n < 2 * nb_point + 1;n++) { + *prsequence += *cisequence++ * *pfilter++; + } + prsequence++; + } + + if (begin_end) { + for (m = MAX(length[i] - nb_point , nb_point);m < length[i];m++) { + cisequence = int_sequence[i][k] + m - nb_point; + pfilter = filter; + *prsequence = 0.; + for (n = 0;n < 2 * nb_point + 1;n++) { + *prsequence += *cisequence * *pfilter++; + if (m - nb_point + n < length[i] - 1) { + cisequence++; + } + } + prsequence++; + } + } + + if (begin_end) { + cisequence = int_sequence[i][k]; + } + else { + cisequence = int_sequence[i][k] + nb_point; + } + + switch (output) { + + case SEQUENCE : { + pisequence = seq->int_sequence[i][j]; + for (m = 0;m < seq->length[i];m++) { + *pisequence++ = *cisequence++; + } + break; + } + + case SUBTRACTION_RESIDUAL : { + prsequence = seq->real_sequence[i][j]; + for (m = 0;m < seq->length[i];m++) { + *prsequence = *cisequence++ - *prsequence; + prsequence++; + } + break; + } + + case DIVISION_RESIDUAL : { + prsequence = seq->real_sequence[i][j]; + for (m = 0;m < seq->length[i];m++) { + if (*prsequence != 0.) { + *prsequence = *cisequence / *prsequence; + } + prsequence++; + cisequence++; + } + break; + } + } + break; + } + + case REAL_VALUE : { + if (begin_end) { + for (m = 0;m < MIN(nb_point , length[i]);m++) { + crsequence = real_sequence[i][k]; + pfilter = filter; + *prsequence = 0.; + for (n = 0;n < 2 * nb_point + 1;n++) { + *prsequence += *crsequence * *pfilter++; + if ((m - nb_point + n >= 0) && (m - nb_point + n < length[i] - 1)) { + crsequence++; + } + } + prsequence++; + } + } + + for (m = nb_point;m < length[i] - nb_point;m++) { + crsequence = real_sequence[i][k] + m - nb_point; + pfilter = filter; + *prsequence = 0.; + for (n = 0;n < 2 * nb_point + 1;n++) { + *prsequence += *crsequence++ * *pfilter++; + } + prsequence++; + } + + if (begin_end) { + for (m = MAX(length[i] - nb_point , nb_point);m < length[i];m++) { + crsequence = real_sequence[i][k] + m - nb_point; + pfilter = filter; + *prsequence = 0.; + for (n = 0;n < 2 * nb_point + 1;n++) { + *prsequence += *crsequence * *pfilter++; + if (m - nb_point + n < length[i] - 1) { + crsequence++; + } + } + prsequence++; + } + } + + prsequence = seq->real_sequence[i][j]; + if (begin_end) { + crsequence = real_sequence[i][k]; + } + else { + crsequence = real_sequence[i][k] + nb_point; + } + + switch (output) { + + case SEQUENCE : { + for (m = 0;m < seq->length[i];m++) { + *prsequence++ = *crsequence++; + } + break; + } + + case SUBTRACTION_RESIDUAL : { + for (m = 0;m < seq->length[i];m++) { + *prsequence = *crsequence++ - *prsequence; + prsequence++; + } + break; + } + + case DIVISION_RESIDUAL : { + for (m = 0;m < seq->length[i];m++) { + if (*prsequence != 0.) { + *prsequence = *crsequence / *prsequence; + } + prsequence++; + crsequence++; + } + break; + } + } + break; + } + } + + if (output == SEQUENCE) { + j++; + } + j++; + } + } + } + } + + if (output == SEQUENCE) { + i = offset; + + if (begin_end) { + for (j = offset;j < nb_variable;j++) { + if ((variable == I_DEFAULT) || (variable == j)) { + seq->min_value[i] = min_value[j]; + seq->max_value[i] = max_value[j]; + + if (marginal_distribution[j]) { + seq->marginal_distribution[i] = new FrequencyDistribution(*marginal_distribution[j]); + } + if (marginal_histogram[j]) { + seq->marginal_histogram[i] = new Histogram(*marginal_histogram[j]); + } + i++; + + seq->min_value_computation(i); + seq->max_value_computation(i); + i++; + } + } + } + + else { + for (j = offset;j < nb_variable;j++) { + if ((variable == I_DEFAULT) || (variable == j)) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + i++; + + seq->min_value_computation(i); + seq->max_value_computation(i); + i++; + } + } + } + } + + else { + for (i = offset;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_histogram(i); + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Filtering of sequences using a symmetric smoothing filter. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_point filter half width, + * \param[in] filter filter, + * \param[in] variable variable index, + * \param[in] begin_end begin and end kept or not, + * \param[in] state smoothing by segment or not using the state variable, + * \param[in] output trend, substraction residuals or division residuals. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::moving_average(StatError &error , int nb_point , vector &filter , + int variable , bool begin_end , bool segmentation , + sequence_type output) const + +{ + return moving_average(error , nb_point , filter.data() , variable , begin_end , segmentation , output); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Filtering of sequences using a symmetric smoothing filter. + * + * \param[in] error reference on a StatError object, + * \param[in] dist symmetric discrete distribution, + * \param[in] variable variable index, + * \param[in] begin_end begin and end kept or not, + * \param[in] state smoothing by segment or not using the state variable, + * \param[in] output trend, substraction residuals or division residuals. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::moving_average(StatError &error , const Distribution &dist , + int variable , bool begin_end , bool segmentation , + sequence_type output) const + +{ + bool status = true; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((dist.offset != 0) || ((dist.nb_value - dist.offset) % 2 == 0)) { + status = false; + error.correction_update(STAT_error[STATR_NB_VALUE] , STAT_error[STATR_ODD]); + } + if (fabs(dist.skewness_computation()) > SKEWNESS_ROUNDNESS) { + status = false; + error.update(STAT_error[STATR_NON_SYMMETRICAL_DISTRIBUTION]); + } + if (dist.complement > 0.) { + status = false; + error.update(STAT_error[STATR_UNPROPER_DISTRIBUTION]); + } + + if (status) { + seq = moving_average(error , dist.nb_value / 2 , dist.mass , variable , + begin_end , segmentation , output); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of pointwise mean, median or mean direction of sequences and + * associated dispersion measures. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] frequency frequencies for successive index parameter values, + * \param[in] dispersion flag computation of dispersion measures, + * \param[in] output output (sequences, residuals or standardized residuals). + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::pointwise_average_ascii_print(StatError &error , const string path , + int *frequency , bool dispersion , + sequence_type output) const + +{ + bool status; + int i , j , k , m; + int buff , inb_sequence , *width; + double standard_normal_value , half_confidence_interval , *t_value; + ios_base::fmtflags format_flags; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + + format_flags = out_file.setf(ios::right , ios::adjustfield); + + if (dispersion) { + +# ifdef MESSAGE + normal normal_dist; + standard_normal_value = quantile(complement(normal_dist , 0.025)); + cout << "\nTEST: " << standard_normal_value; +# endif + + t_value = new double[length[0]]; + for (i = 0;i < length[0];i++) { + if (frequency[i] > 1) { +// t_value[i] = t_value_computation(false , frequency[i] - 1 , 0.05); + students_t students_dist(frequency[i] - 1); + t_value[i] = quantile(complement(students_dist , 0.025)); + } + } + +# ifdef MESSAGE + cout << " | " << t_value[0] << " " << frequency[0] << endl; +# endif + + } + + else { + t_value = NULL; + } + + // computation of the column widths + + inb_sequence = nb_sequence; + if (dispersion) { + inb_sequence--; + } + + width = new int[2 * nb_variable + 2]; + + for (i = 0;i < nb_variable;i++) { + width[i] = column_width(length[0] , real_sequence[0][i]); + if (dispersion) { + buff = column_width(length[nb_sequence - 1] , real_sequence[nb_sequence - 1][i]); + if (buff > width[i]) { + width[i] = buff; + } + } + width[i] += ASCII_SPACE; + } + + if (index_parameter) { + width[nb_variable] = column_width(index_parameter_distribution->nb_value - 1); + } + else { + width[nb_variable] = column_width(max_length); + } + + width[nb_variable + 1] = column_width(nb_sequence) + ASCII_SPACE; + + if (nb_sequence < POINTWISE_AVERAGE_NB_SEQUENCE) { + for (i = 0;i < nb_variable;i++) { + width[nb_variable + i + 2] = 0; + for (j = 1;j < inb_sequence;j++) { + buff = column_width(length[j] , real_sequence[j][i]); + if (buff > width[nb_variable + i + 2]) { + width[nb_variable + i + 2] = buff; + } + } + width[nb_variable + i + 2] += ASCII_SPACE; + } + } + + switch (output) { + case SUBTRACTION_RESIDUAL : + out_file << STAT_label[STATL_RESIDUAL] << "\n" << endl; + break; + case STANDARDIZED_RESIDUAL : + out_file << STAT_label[STATL_STANDARDIZED_RESIDUAL] << "\n" << endl; + break; + } + + for (i = 0;i < nb_variable;i++) { + out_file << STAT_label[STATL_VARIABLE] << " " << i + 1 << endl; + + if (index_param_type == TIME) { + out_file << "\n" << SEQ_label[SEQL_TIME]; + } + else { + out_file << "\n" << SEQ_label[SEQL_INDEX]; + } + + out_file << " " << STAT_label[STATL_MEAN]; + if (dispersion) { + out_file << " " << STAT_label[STATL_MEAN_CONFIDENCE_INTERVAL] + << " " << STAT_label[STATL_STANDARD_DEVIATION]; + } + out_file << " " << STAT_label[STATL_FREQUENCY]; + + if (nb_sequence < POINTWISE_AVERAGE_NB_SEQUENCE) { + out_file << " "; + for (j = 1;j < inb_sequence;j++) { + out_file << " " << SEQ_label[SEQL_SEQUENCE] << " " << identifier[j]; + } + } + out_file << endl; + + for (j = 0;j < length[0];j++) { + out_file << setw(width[nb_variable]) << (index_parameter ? index_parameter[0][j] : j) << " " + << setw(width[i]) << real_sequence[0][i][j]; + + if (dispersion) { + if (frequency[j] > 1) { +// half_confidence_interval = standard_normal_value * real_sequence[nb_sequence - 1][i][j] / sqrt((double)frequency[j]); + half_confidence_interval = t_value[j] * real_sequence[nb_sequence - 1][i][j] / sqrt((double)frequency[j]); + out_file << setw(width[i]) << real_sequence[0][i][j] - half_confidence_interval + << setw(width[i]) << real_sequence[0][i][j] + half_confidence_interval; + } + + else { + out_file << setw(width[i]) << " " + << setw(width[i]) << " "; + } + + out_file << setw(width[i]) << real_sequence[nb_sequence - 1][i][j]; + } + + out_file << setw(width[nb_variable + 1]) << frequency[j]; + + if (inb_sequence - 1 < POINTWISE_AVERAGE_NB_SEQUENCE) { + out_file << " "; + + if (index_parameter) { + for (k = 1;k < inb_sequence;k++) { + for (m = 0;m < length[k];m++) { + if (index_parameter[k][m] == index_parameter[0][j]) { + out_file << setw(width[nb_variable + i + 2]) << real_sequence[k][i][m]; + break; + } + } + + if (m == length[k]) { + out_file << setw(width[nb_variable + i + 2]) << " "; + } + } + } + + else { + for (k = 1;k < inb_sequence;k++) { + if (j < length[k]) { + out_file << setw(width[nb_variable + i + 2]) << real_sequence[k][i][j]; + } + else { + out_file << setw(width[nb_variable + i + 2]) << " "; + } + } + } + } + + out_file << endl; + } + + out_file << endl; + } + + delete [] width; + delete [] t_value; + + out_file.setf(format_flags , ios::adjustfield); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the pointwise mean, median or mean direction of sequences and + * associated dispersion measures at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] frequency frequencies for successive index parameter values, + * \param[in] dispersion flag computation of dispersion measures, + * \param[in] output output (sequences, residuals or standardized residuals). + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::pointwise_average_spreadsheet_print(StatError &error , const string path , + int *frequency , bool dispersion , + sequence_type output) const + +{ + bool status; + int i , j , k , m; + int inb_sequence; + double standard_normal_value , half_confidence_interval , *t_value; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + + if (dispersion) { +// normal normal_dist; +// standard_normal_value = quantile(complement(normal_dist , 0.025)); + + t_value = new double[length[0]]; + for (i = 0;i < length[0];i++) { + if (frequency[i] > 1) { +// t_value[i] = t_value_computation(false , frequency[i] - 1 , 0.05); + students_t students_dist(frequency[i] - 1); + t_value[i] = quantile(complement(students_dist , 0.025)); + } + } + } + + else { + t_value = NULL; + } + + switch (output) { + case SUBTRACTION_RESIDUAL : + out_file << STAT_label[STATL_RESIDUAL] << "\n" << endl; + break; + case STANDARDIZED_RESIDUAL : + out_file << STAT_label[STATL_STANDARDIZED_RESIDUAL] << "\n" << endl; + break; + } + + for (i = 0;i < nb_variable;i++) { + out_file << STAT_label[STATL_VARIABLE] << "\t" << i + 1 << endl; + + if (index_param_type == TIME) { + out_file << "\n" << SEQ_label[SEQL_TIME]; + } + else { + out_file << "\n" << SEQ_label[SEQL_INDEX]; + } + + out_file << "\t" << STAT_label[STATL_MEAN]; + if (dispersion) { + out_file << "\t" << STAT_label[STATL_MEAN_CONFIDENCE_INTERVAL] + << "\t\t" << STAT_label[STATL_STANDARD_DEVIATION]; + } + out_file << "\t" << STAT_label[STATL_FREQUENCY]; + + if (nb_sequence < POINTWISE_AVERAGE_NB_SEQUENCE) { + inb_sequence = nb_sequence; + if (dispersion) { + inb_sequence--; + } + + out_file << "\t"; + for (j = 1;j < inb_sequence;j++) { + out_file << "\t" << SEQ_label[SEQL_SEQUENCE] << " " << identifier[j]; + } + } + out_file << endl; + + for (j = 0;j < length[0];j++) { + out_file << (index_parameter ? index_parameter[0][j] : j) + << "\t" << real_sequence[0][i][j]; + + if (dispersion) { + if (frequency[j] > 1) { +// half_confidence_interval = standard_normal_value * real_sequence[nb_sequence - 1][i][j] / sqrt((double)frequency[j]); + half_confidence_interval = t_value[j] * real_sequence[nb_sequence - 1][i][j] / sqrt((double)frequency[j]); + out_file << "\t" << real_sequence[0][i][j] - half_confidence_interval + << "\t" << real_sequence[0][i][j] + half_confidence_interval; + } + + else { + out_file << "\t\t"; + } + + out_file << "\t" << real_sequence[nb_sequence - 1][i][j]; + } + + out_file << "\t" << frequency[j]; + + if (inb_sequence - 1 < POINTWISE_AVERAGE_NB_SEQUENCE) { + out_file << "\t"; + + if (index_parameter) { + for (k = 1;k < inb_sequence;k++) { + out_file << "\t"; + for (m = 0;m < length[k];m++) { + if (index_parameter[k][m] == index_parameter[0][j]) { + out_file << real_sequence[k][i][m]; + break; + } + } + } + } + + else { + for (k = 1;k < inb_sequence;k++) { + out_file << "\t"; + if (j < length[k]) { + out_file << real_sequence[k][i][j]; + } + } + } + } + + out_file << endl; + } + + out_file << endl; + } + + delete [] t_value; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the pointwise mean, median or mean direction of sequences and + * associated dispersion measures. + * + * \param[in] error reference on a StatError object, + * \param[in] circular flag circular variables, + * \param[in] robust flag computation of robust location and dispersion measures, + * \param[in] dispersion flag computation of dispersion measures, + * \param[in] output output (sequences, residuals or standardized residuals), + * \param[in] path file path, + * \param[in] format format (ASCII/SPREADSHEET). + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::pointwise_average(StatError &error , bool circular , bool robust , + bool dispersion , sequence_type output , + const string path , output_format format) const + +{ + bool status = true; + int i , j , k; + int inb_sequence , min_identifier , max_identifier , *iidentifier , *ilength , + *pindex_param , *frequency , *index , *int_sample , *pisample; + variable_nature *itype; + angle_unit unit; + double diff , *prsequence , *plocation , *pdispersion , *real_sample , *prsample , + *pmean_direction1 , *pmean_direction2 , ***mean_direction; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (nb_sequence == 1) { + status = false; + error.update(SEQ_error[SEQR_SINGLE_SEQUENCE]); + } + if ((index_param_type != IMPLICIT_TYPE) && (index_param_type != TIME)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != STATE) && (type[i] != REAL_VALUE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE] << " or " + << STAT_variable_word[REAL_VALUE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (status) { + if ((output == STANDARDIZED_RESIDUAL) && (!dispersion)) { + dispersion = true; + } + + inb_sequence = nb_sequence + (dispersion ? 2 : 1); + + iidentifier = new int[inb_sequence]; + + min_identifier = identifier[0]; + for (i = 0;i < nb_sequence;i++) { + if (identifier[i] < min_identifier) { + min_identifier = identifier[i]; + } + + iidentifier[i + 1] = identifier[i]; + } + + iidentifier[0] = min_identifier - 1; + + if (dispersion) { + max_identifier = identifier[nb_sequence - 1]; + for (i = 0;i < nb_sequence - 1;i++) { + if (identifier[i] > max_identifier) { + max_identifier = identifier[i]; + } + } + + iidentifier[inb_sequence - 1] = max_identifier + 1; + } + + ilength = new int[inb_sequence]; + for (i = 0;i < nb_sequence;i++) { + ilength[i + 1] = length[i]; + } + + if (index_parameter) { + ilength[0] = 0; + for (i = index_parameter_distribution->offset;i < index_parameter_distribution->nb_value;i++) { + if (index_parameter_distribution->frequency[i] > 0) { + ilength[0]++; + } + } + } + + else { + ilength[0] = max_length; + } + + if (dispersion) { + ilength[inb_sequence - 1] = ilength[0]; + } + + itype = new variable_nature[nb_variable]; + for (i = 0;i < nb_variable;i++) { + itype[i] = REAL_VALUE; + } + + seq = new Sequences(inb_sequence , iidentifier , ilength , NULL , + index_param_type , nb_variable , itype); + + delete [] iidentifier; + delete [] ilength; + delete [] itype; + + if (index_parameter) { + pindex_param = seq->index_parameter[0]; + for (i = index_parameter_distribution->offset;i < index_parameter_distribution->nb_value;i++) { + if (index_parameter_distribution->frequency[i] > 0) { + *pindex_param++ = i; + } + } + + if (dispersion) { + for (i = 0;i < seq->length[seq->nb_sequence - 1];i++) { + seq->index_parameter[seq->nb_sequence - 1][i] = seq->index_parameter[0][i]; + } + } + + // copy of index parameters + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + seq->index_parameter[i + 1][j] = index_parameter[i][j]; + } + } + + seq->build_index_parameter_frequency_distribution(); + seq->index_interval_computation(); + } + + // computation of frequencies for each index parameter value + + frequency = new int[seq->length[0]]; + + if (index_parameter) { + pindex_param = seq->index_parameter[0]; + i = 0; + for (j = index_parameter_distribution->offset;j < index_parameter_distribution->nb_value;j++) { + if (index_parameter_distribution->frequency[j] > 0) { + frequency[i++] = index_parameter_distribution->frequency[j]; + } + } + } + + else { + frequency[0] = nb_sequence; + for (i = 1;i < max_length;i++) { + frequency[i] = frequency[i - 1] - length_distribution->frequency[i]; + } + } + + if (robust) { + if (index_parameter) { + index = new int[nb_sequence]; + } + else { + index = NULL; + } + int_sample = new int[nb_sequence]; + real_sample = new double[nb_sequence]; + } + + if (circular) { + + // choice of the angle unit + + unit = RADIAN; + for (i = 0;i < nb_variable;i++) { + if (max_value[i] - min_value[i] > 2 * M_PI) { + unit = DEGREE; + break; + } + } + + // computation of mean directions + + mean_direction = new double**[seq->nb_variable]; + for (i = 0;i < seq->nb_variable;i++) { + mean_direction[i] = new double*[3]; + for (j = 0;j < 3;j++) { + mean_direction[i][j] = new double[seq->length[0]]; + } + } + + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[0];j++) { + mean_direction[i][0][j] = 0.; + mean_direction[i][1][j] = 0.; + } + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pindex_param = seq->index_parameter[0]; + pmean_direction1 = mean_direction[j][0]; + pmean_direction2 = mean_direction[j][1]; + + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + pmean_direction1++; + pmean_direction2++; + } + pindex_param++; + *pmean_direction1++ += cos(int_sequence[i][j][k] * M_PI / 180); + *pmean_direction2++ += sin(int_sequence[i][j][k] * M_PI / 180); + } + } + + else { + switch (unit) { + + case DEGREE : { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + pmean_direction1++; + pmean_direction2++; + } + pindex_param++; + *pmean_direction1++ += cos(real_sequence[i][j][k] * M_PI / 180); + *pmean_direction2++ += sin(real_sequence[i][j][k] * M_PI / 180); + } + break; + } + + case RADIAN : { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + pmean_direction1++; + pmean_direction2++; + } + pindex_param++; + *pmean_direction1++ += cos(real_sequence[i][j][k]); + *pmean_direction2++ += sin(real_sequence[i][j][k]); + } + break; + } + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + mean_direction[j][0][k] += cos(int_sequence[i][j][k] * M_PI / 180); + mean_direction[j][1][k] += sin(int_sequence[i][j][k] * M_PI / 180); + } + } + + else { + switch (unit) { + + case DEGREE : { + for (k = 0;k < length[i];k++) { + mean_direction[j][0][k] += cos(real_sequence[i][j][k] * M_PI / 180); + mean_direction[j][1][k] += sin(real_sequence[i][j][k] * M_PI / 180); + } + break; + } + + case RADIAN : { + for (k = 0;k < length[i];k++) { + mean_direction[j][0][k] += cos(real_sequence[i][j][k]); + mean_direction[j][1][k] += sin(real_sequence[i][j][k]); + } + break; + } + } + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[0];j++) { + mean_direction[i][0][j] /= frequency[j]; + mean_direction[i][1][j] /= frequency[j]; + + mean_direction[i][2][j] = sqrt(mean_direction[i][0][j] * mean_direction[i][0][j] + + mean_direction[i][1][j] * mean_direction[i][1][j]); + + if (mean_direction[i][2][j] > 0.) { + seq->real_sequence[0][i][j] = atan(mean_direction[i][1][j] / mean_direction[i][0][j]); + + if (mean_direction[i][0][j] < 0.) { + seq->real_sequence[0][i][j] += M_PI; + } + if (unit == DEGREE) { + seq->real_sequence[0][i][j] *= (180 / M_PI); + } + } + + else { + seq->real_sequence[0][i][j] = D_DEFAULT; + } + } + } + + // computation of circular standard deviations + + if (dispersion) { + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[0];j++) { + if (mean_direction[i][2][j] > 0.) { + seq->real_sequence[seq->nb_sequence - 1][i][j] = sqrt(-2 * log(mean_direction[i][2][j])); + if (unit == DEGREE) { + seq->real_sequence[seq->nb_sequence - 1][i][j] *= (180 / M_PI); + } + } + + else { + seq->real_sequence[seq->nb_sequence - 1][i][j] = D_DEFAULT; + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < 3;j++) { + delete [] mean_direction[i][j]; + } + delete [] mean_direction[i]; + } + delete [] mean_direction; + } + + else { + if (robust) { + + // computation of medians + + if (index_parameter) { + for (i = 0;i < nb_variable;i++) { + for (j = 0;j < nb_sequence;j++) { + index[j] = 0; + } + + if (type[i] != REAL_VALUE) { + for (j = 0;j < seq->length[0];j++) { + pisample = int_sample; + for (k = 0;k < nb_sequence;k++) { + while ((index[k] < length[k]) && (index_parameter[k][index[k]] < seq->index_parameter[0][j])) { + index[k]++; + } + if ((index[k] < length[k]) && (index_parameter[k][index[k]] == seq->index_parameter[0][j])) { + *pisample++ = int_sequence[k][i][index[k]]; + } + } + + seq->real_sequence[0][i][j] = quantile_computation(frequency[j] , int_sample , 0.5); + } + } + + else { + for (j = 0;j < seq->length[0];j++) { + prsample = real_sample; + for (k = 0;k < nb_sequence;k++) { + while ((index[k] < length[k]) && (index_parameter[k][index[k]] < seq->index_parameter[0][j])) { + index[k]++; + } + if ((index[k] < length[k]) && (index_parameter[k][index[k]] == seq->index_parameter[0][j])) { + *prsample++ = real_sequence[k][i][index[k]]; + } + } + + seq->real_sequence[0][i][j] = quantile_computation(frequency[j] , real_sample , 0.5); + } + } + } + } + + else { + for (i = 0;i < nb_variable;i++) { + if (type[i] != REAL_VALUE) { + for (j = 0;j < max_length;j++) { + pisample = int_sample; + for (k = 0;k < nb_sequence;k++) { + if (j < length[k]) { + *pisample++ = int_sequence[k][i][j]; + } + } + + seq->real_sequence[0][i][j] = quantile_computation(frequency[j] , int_sample , 0.5); + } + } + + else { + for (j = 0;j < max_length;j++) { + prsample = real_sample; + for (k = 0;k < nb_sequence;k++) { + if (j < length[k]) { + *prsample++ = real_sequence[k][i][j]; + } + } + + seq->real_sequence[0][i][j] = quantile_computation(frequency[j] , real_sample , 0.5); + } + } + } + } + + // computation of mean absolute deviations from the median + + if (dispersion) { + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[seq->nb_sequence - 1];j++) { + seq->real_sequence[seq->nb_sequence - 1][i][j] = 0.; + } + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pindex_param = seq->index_parameter[seq->nb_sequence - 1]; + prsequence = seq->real_sequence[seq->nb_sequence - 1][j]; + plocation = seq->real_sequence[0][j]; + + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + prsequence++; + plocation++; + } + pindex_param++; + *prsequence++ += fabs(int_sequence[i][j][k] - *plocation++); + } + } + + else { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + prsequence++; + plocation++; + } + pindex_param++; + *prsequence++ += fabs(real_sequence[i][j][k] - *plocation++); + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + seq->real_sequence[seq->nb_sequence - 1][j][k] += fabs(int_sequence[i][j][k] - seq->real_sequence[0][j][k]); + } + } + + else { + for (k = 0;k < length[i];k++) { + seq->real_sequence[seq->nb_sequence - 1][j][k] += fabs(real_sequence[i][j][k] - seq->real_sequence[0][j][k]); + } + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[seq->nb_sequence - 1];j++) { + if (frequency[j] > 1) { + seq->real_sequence[seq->nb_sequence - 1][i][j] = seq->real_sequence[seq->nb_sequence - 1][i][j] / + (frequency[j] - 1); + } + } + } + } + } + + else { + + // computation of means + + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[0];j++) { + seq->real_sequence[0][i][j] = 0.; + } + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pindex_param = seq->index_parameter[0]; + prsequence = seq->real_sequence[0][j]; + + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + prsequence++; + } + pindex_param++; + *prsequence++ += int_sequence[i][j][k]; + } + } + + else { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + prsequence++; + } + pindex_param++; + *prsequence++ += real_sequence[i][j][k]; + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + seq->real_sequence[0][j][k] += int_sequence[i][j][k]; + } + } + + else { + for (k = 0;k < length[i];k++) { + seq->real_sequence[0][j][k] += real_sequence[i][j][k]; + } + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[0];j++) { + seq->real_sequence[0][i][j] /= frequency[j]; + } + } + + // computation of standard deviations + + if (dispersion) { + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[seq->nb_sequence - 1];j++) { + seq->real_sequence[seq->nb_sequence - 1][i][j] = 0.; + } + } + + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pindex_param = seq->index_parameter[seq->nb_sequence - 1]; + prsequence = seq->real_sequence[seq->nb_sequence - 1][j]; + plocation = seq->real_sequence[0][j]; + + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + prsequence++; + plocation++; + } + pindex_param++; + diff = int_sequence[i][j][k] - *plocation++; + *prsequence++ += diff * diff; + } + } + + else { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + prsequence++; + plocation++; + } + pindex_param++; + diff = real_sequence[i][j][k] - *plocation++; + *prsequence++ += diff * diff; + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + diff = int_sequence[i][j][k] - seq->real_sequence[0][j][k]; + seq->real_sequence[seq->nb_sequence - 1][j][k] += diff * diff; + } + } + + else { + for (k = 0;k < length[i];k++) { + diff = real_sequence[i][j][k] - seq->real_sequence[0][j][k]; + seq->real_sequence[seq->nb_sequence - 1][j][k] += diff * diff; + } + } + } + } + } + + for (i = 0;i < seq->nb_variable;i++) { + for (j = 0;j < seq->length[seq->nb_sequence - 1];j++) { + if (frequency[j] > 1) { + seq->real_sequence[seq->nb_sequence - 1][i][j] = sqrt(seq->real_sequence[seq->nb_sequence - 1][i][j] / + (frequency[j] - 1)); + } + } + } + } + } + } + + switch (output) { + + // copy of sequences + + case SEQUENCE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k]; + } + } + + else { + for (k = 0;k < length[i];k++) { + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k]; + } + } + } + } + break; + } + + // computation of residuals + + case SUBTRACTION_RESIDUAL : { + if (circular) { + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pindex_param = seq->index_parameter[0]; + plocation = seq->real_sequence[0][j]; + + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + plocation++; + } + + pindex_param++; + if (fabs(int_sequence[i][j][k] - *plocation) <= 180) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - *plocation++; + } + else if (int_sequence[i][j][k] - *plocation > 180) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - *plocation++ - 360; + } + else { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - *plocation++ + 360; + } + } + } + + else { + switch (unit) { + + case DEGREE : { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + plocation++; + } + + pindex_param++; + if (fabs(int_sequence[i][j][k] - *plocation) <= 180) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - *plocation++; + } + else if (int_sequence[i][j][k] - *plocation > 180) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - *plocation++ - 360; + } + else { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - *plocation++ + 360; + } + } + break; + } + + case RADIAN : { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + plocation++; + } + + pindex_param++; + if (fabs(real_sequence[i][j][k] - *plocation) <= M_PI) { + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k] - *plocation++; + } + else if (real_sequence[i][j][k] - *plocation > M_PI) { + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k] - *plocation++ - 2 * M_PI; + } + else { + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k] - *plocation++ + 2 * M_PI; + } + } + break; + } + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + if (fabs(int_sequence[i][j][k] - seq->real_sequence[0][j][k]) <= 180) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - seq->real_sequence[0][j][k]; + } + else if (int_sequence[i][j][k] - seq->real_sequence[0][j][k] > 180) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - seq->real_sequence[0][j][k] - 360; + } + else { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - seq->real_sequence[0][j][k] + 360; + } + } + } + + else { + switch (unit) { + + case DEGREE : { + for (k = 0;k < length[i];k++) { + if (fabs(int_sequence[i][j][k] - seq->real_sequence[0][j][k]) <= 180) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - seq->real_sequence[0][j][k]; + } + else if (int_sequence[i][j][k] - seq->real_sequence[0][j][k] > 180) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - seq->real_sequence[0][j][k] - 360; + } + else { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - seq->real_sequence[0][j][k] + 360; + } + } + break; + } + + case RADIAN : { + for (k = 0;k < length[i];k++) { + if (fabs(real_sequence[i][j][k] - seq->real_sequence[0][j][k]) <= M_PI) { + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k] - seq->real_sequence[0][j][k]; + } + else if (int_sequence[i][j][k] - seq->real_sequence[0][j][k] > M_PI) { + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k] - seq->real_sequence[0][j][k] - 2 * M_PI; + } + else { + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k] - seq->real_sequence[0][j][k] + 2 * M_PI; + } + } + break; + } + } + } + } + } + } + } + + else { + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pindex_param = seq->index_parameter[0]; + plocation = seq->real_sequence[0][j]; + + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + plocation++; + } + pindex_param++; + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - *plocation++; + } + } + + else { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + plocation++; + } + pindex_param++; + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k] - *plocation++; + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + seq->real_sequence[i + 1][j][k] = int_sequence[i][j][k] - seq->real_sequence[0][j][k]; + } + } + + else { + for (k = 0;k < length[i];k++) { + seq->real_sequence[i + 1][j][k] = real_sequence[i][j][k] - seq->real_sequence[0][j][k]; + } + } + } + } + } + } + break; + } + + // computation of standardized residuals + + case STANDARDIZED_RESIDUAL : { + if (index_parameter) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pindex_param = seq->index_parameter[0]; + plocation = seq->real_sequence[0][j]; + pdispersion = seq->real_sequence[seq->nb_sequence - 1][j]; + + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + plocation++; + pdispersion++; + } + pindex_param++; + if (*pdispersion > 0.) { + seq->real_sequence[i + 1][j][k] = (int_sequence[i][j][k] - *plocation) / + *pdispersion; + } + else { + seq->real_sequence[i + 1][j][k] = 0.; + } + plocation++; + pdispersion++; + } + } + + else { + for (k = 0;k < length[i];k++) { + while (*pindex_param < index_parameter[i][k]) { + pindex_param++; + plocation++; + pdispersion++; + } + pindex_param++; + if (*pdispersion > 0.) { + seq->real_sequence[i + 1][j][k] = (real_sequence[i][j][k] - *plocation) / + *pdispersion; + } + else { + seq->real_sequence[i + 1][j][k] = 0.; + } + plocation++; + pdispersion++; + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + if (type[j] != REAL_VALUE) { + for (k = 0;k < length[i];k++) { + if (seq->real_sequence[seq->nb_sequence - 1][j][k] > 0.) { + seq->real_sequence[i + 1][j][k] = (int_sequence[i][j][k] - seq->real_sequence[0][j][k]) / + seq->real_sequence[seq->nb_sequence - 1][j][k]; + } + else { + seq->real_sequence[i + 1][j][k] = 0.; + } + } + } + + else { + for (k = 0;k < length[i];k++) { + if (seq->real_sequence[seq->nb_sequence - 1][j][k] > 0.) { + seq->real_sequence[i + 1][j][k] = (real_sequence[i][j][k] - seq->real_sequence[0][j][k]) / + seq->real_sequence[seq->nb_sequence - 1][j][k]; + } + else { + seq->real_sequence[i + 1][j][k] = 0.; + } + } + } + } + } + } + break; + } + } + + if ((output == SEQUENCE) && (!dispersion)) { + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value[i] = min_value[i]; + seq->max_value[i] = max_value[i]; + } + } + else { + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + } + } + + for (i = 0;i < seq->nb_variable;i++) { + seq->build_marginal_histogram(i); + } + + // writing of pointwise mean, median or mean direction of sequences and associated dispersion measures + + if (!path.empty()) { + switch (format) { + case ASCII : + status = seq->pointwise_average_ascii_print(error , path , frequency , + dispersion , output); + break; + case SPREADSHEET : + status = seq->pointwise_average_spreadsheet_print(error , path , frequency , + dispersion , output); + break; + } + + if (!status) { + +# ifdef MESSAGE + cout << error; +# endif + + } + } + + delete [] frequency; + + if (robust) { + delete [] index; + delete [] int_sample; + delete [] real_sample; + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the recurrence time sequences for a value taken by + * an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] value value. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::recurrence_time_sequences(StatError &error , int variable , int value) const + +{ + bool status = true; + int i , j; + int inb_sequence , ilength , previous_index , *psequence; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if (!marginal_distribution[variable]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else if ((value < marginal_distribution[variable]->offset) || + (value >= marginal_distribution[variable]->nb_value) || + (marginal_distribution[variable]->frequency[value] == 0)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_label[STATL_VALUE] << " " << value << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + seq = new Sequences(nb_sequence , 1); + + // computation of the recurrence time sequences + + inb_sequence = 0; + + for (i = 0;i < nb_sequence;i++) { + previous_index = 0; + ilength = 0; + for (j = 0;j < length[i];j++) { + if (int_sequence[i][variable][j] == value) { + if (ilength == 0) { + seq->int_sequence[inb_sequence][0] = new int[length[i]]; + psequence = seq->int_sequence[inb_sequence][0]; + } + + *psequence++ = j - previous_index; + previous_index = j; + ilength++; + } + } + + if (ilength > 0) { + seq->length[inb_sequence] = ilength; + seq->identifier[inb_sequence++] = identifier[i]; + } + } + + seq->nb_sequence = inb_sequence; + + seq->max_length_computation(); + seq->cumul_length_computation(); + seq->build_length_frequency_distribution(); + + seq->min_value_computation(0); + seq->max_value_computation(0); + + seq->build_marginal_frequency_distribution(0); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of sojourn time sequences for an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::sojourn_time_sequences(StatError &error , int variable) const + +{ + bool status = true; + int i , j; + int ilength , begin_run , *pstate , *psequence; + variable_nature itype[2]; +// int run_length; + Sequences *seq; + + + seq = NULL; + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else if (!marginal_distribution[variable]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + } + + if (status) { + itype[0] = type[variable]; + itype[1] = INT_VALUE; + + seq = new Sequences(nb_sequence , identifier , length , NULL , + IMPLICIT_TYPE , 2 , itype); + + // computation of sojourn time sequences + + if ((index_param_type == TIME) && (index_interval->variance > 0.)) { // for the mango growth follow-ups and + for (i = 0;i < nb_sequence;i++) { // the Arabidopsis rosettes + pstate = seq->int_sequence[i][0]; + psequence = seq->int_sequence[i][1]; + begin_run = index_parameter_distribution->offset; +// begin_run = 0; + ilength = 0; + + for (j = 0;j < length[i] - 1;j++) { + if (int_sequence[i][variable][j + 1] != int_sequence[i][variable][j]) { + *pstate++ = int_sequence[i][variable][j]; + *psequence++ = index_parameter[i][j + 1] - begin_run; + begin_run = index_parameter[i][j + 1]; + ilength++; + } + } + + *pstate = int_sequence[i][variable][length[i] - 1]; + *psequence = index_parameter[i][j] + 1 - begin_run; + + seq->length[i] = ilength + 1; + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + pstate = seq->int_sequence[i][0]; + psequence = seq->int_sequence[i][1]; +// run_length = 1; + begin_run = 0; + ilength = 0; + + for (j = 0;j < length[i] - 1;j++) { + if (int_sequence[i][variable][j + 1] != int_sequence[i][variable][j]) { + *pstate++ = int_sequence[i][variable][j]; +// *psequence++ = run_length; +// run_length = 0; + *psequence++ = j + 1 - begin_run; + begin_run = j + 1; + ilength++; + } + +// run_length++; + } + + *pstate = int_sequence[i][variable][length[i] - 1]; +// *psequence = run_length; + *psequence = length[i] - begin_run; + + seq->length[i] = ilength + 1; + } + } + + seq->max_length_computation(); + seq->cumul_length_computation(); + delete seq->length_distribution; + seq->build_length_frequency_distribution(); + + for (i = 0;i < 2;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Discretization of positions. + * + * \param[in] error reference on a StatError object, + * \param[in] step discretization step. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::transform_position(StatError &error , int step) const + +{ + bool status = true; + int i , j , k , m; + int inter_position , nb_unit , *ilength , **pisequence; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (index_param_type != POSITION) { + status = false; + error.correction_update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE] , SEQ_index_parameter_word[POSITION]); + } + + else if ((step < 1) || ((index_interval) && (step > index_interval->mean))) { + status = false; + error.update(SEQ_error[SEQR_POSITION_STEP]); + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != INT_VALUE) && (type[i] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << i + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + } + + if (status) { + ilength = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + ilength[i] = index_parameter[i][length[i]] / step + 1 + length[i]; + } + + seq = new Sequences(nb_sequence , identifier , ilength , nb_variable); + delete [] ilength; + + // extraction of sequences + + pisequence = new int*[nb_variable]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_variable;j++) { + pisequence[j] = seq->int_sequence[i][j]; + } + seq->length[i] = 0; + + for (j = 0;j <= length[i];j++) { + if (j == 0) { + inter_position = index_parameter[i][j]; + } + else { + inter_position = index_parameter[i][j] - index_parameter[i][j - 1]; + } + + nb_unit = (inter_position % step == 0 ? inter_position / step : inter_position / step + 1); + if ((nb_unit > 0) && (j < length[i])) { + nb_unit--; + } + + for (k = 0;k < nb_variable;k++) { + for (m = 0;m < nb_unit;m++) { + *pisequence[k]++ = (int)min_value[k] - 1; + } + if (j < length[i]) { + *pisequence[k]++ = int_sequence[i][k][j]; + } + } + + if (j < length[i]) { + seq->length[i] += nb_unit + 1; + } + else { + seq->length[i] += nb_unit; + } + } + } + + seq->max_length_computation(); + seq->cumul_length_computation(); + delete seq->length_distribution; + seq->build_length_frequency_distribution(); + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value[i] = min_value[i] - 1; + seq->max_value[i] = max_value[i]; + + seq->build_marginal_frequency_distribution(i); + } + + delete [] pisequence; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Crossing of sequences. + * + * \param[in] error reference on a StatError object. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::cross(StatError &error) const + +{ + bool status = true; + int i , j , k , m; + int sense = 0 , *ilength; + Sequences *seq; + + + seq = NULL; + error.init(); + + if (type[0] != INT_VALUE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + for (i = 1;i < nb_sequence;i++) { + if (length[i] > length[i - 1]) { + if (sense == 0) { + sense++; + } + else if (sense == -1) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " + << SEQ_error[SEQR_LENGTH]; + error.update((error_message.str()).c_str()); + } + } + + else if (length[i] < length[i - 1]) { + if (sense == 0) { + sense--; + } + else if (sense == 1) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE] << " " << i + 1 << ": " + << SEQ_error[SEQR_LENGTH]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + ilength = new int[max_length]; + for (i = 0;i < max_length;i++) { + ilength[i] = nb_sequence; + } + + seq = new Sequences(max_length , NULL , ilength , NULL , IMPLICIT_TYPE , + nb_variable , type); + delete [] ilength; + + // construction of crossed sequences + + for (i = 0;i < seq->nb_sequence;i++) { + j = 0; + while (length[j] <= i) { + j++; + } + + k = 0; + do { + for (m = 0;m < seq->nb_variable;m++) { + if (seq->type[m] != REAL_VALUE) { + seq->int_sequence[i][m][k] = int_sequence[j][m][i]; + } + else { + seq->real_sequence[i][m][k] = real_sequence[j][m][i]; + } + } + j++; + k++; + } + while ((j < nb_sequence) && (length[j] > i)); + + seq->length[i] = k; + } + + seq->max_length = nb_sequence; + seq->cumul_length = cumul_length; + delete seq->length_distribution; + seq->build_length_frequency_distribution(); + + for (i = 0;i < seq->nb_variable;i++) { + seq->min_value[i] = min_value[i]; + seq->max_value[i] = max_value[i]; + + if (marginal_distribution[i]) { + seq->marginal_distribution[i] = new FrequencyDistribution(*marginal_distribution[i]); + } + if (marginal_histogram[i]) { + seq->marginal_histogram[i] = new Histogram(*marginal_histogram[i]); + } + } + } + + return seq; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/sequences3.cpp b/src/cpp/sequence_analysis/sequences3.cpp new file mode 100644 index 0000000..bf34a8a --- /dev/null +++ b/src/cpp/sequence_analysis/sequences3.cpp @@ -0,0 +1,4175 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id: sequences3.cpp 11060 2011-09-02 16:28:11Z guedon $ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "stat_tool/quantile_computation.hpp" + +#include "sequences.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a Sequences objects from arrays of index_parameters, + * discrete values, real values, MTG vertex and sequence identifiers. + * + * \param[in] error reference on a StatError object, + * \param[in] iindex_param_type index parameter type (TIME/POSITION), + * \param[in] iindex_parameter index parameters, + * \param[in] iint_vector integer-valued sequences, + * \param[in] ireal_vector real-valued sequences, + * \param[in] iidentifier sequence identifiers, + * \param[in] ivertex_identifier vertex identifiers of the associated MTG. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::build(StatError &error , index_parameter_type iindex_param_type , + const vector > &iindex_parameter , + const vector > > &iint_sequence , + const vector > > &ireal_sequence , + const vector &iidentifier , const vector > &ivertex_identifier) + +{ + bool status = true; + int i , j; + int inb_sequence , nb_int_variable , nb_real_variable , *ilength; + Sequences *seq; + + + seq = NULL; + inb_sequence = I_DEFAULT; + error.init(); + + if (!iint_sequence.empty()) { + inb_sequence = iint_sequence.size(); + } + else if (!ireal_sequence.empty()) { + inb_sequence = ireal_sequence.size(); + } + else { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + if ((!iint_sequence.empty()) && (!ireal_sequence.empty()) && (iint_sequence.size() != ireal_sequence.size())) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if ((!iindex_parameter.empty()) && (iindex_parameter.size() != inb_sequence)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if ((!iidentifier.empty()) && (iidentifier.size() != inb_sequence)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if ((!ivertex_identifier.empty()) && (ivertex_identifier.size() != inb_sequence)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + if (status) { + ilength = new int [inb_sequence]; + + for (i = 0;i < inb_sequence;i++) { + if (!iint_sequence.empty()) { + ilength[i] = iint_sequence[i].size(); + } + else if (!ireal_sequence.empty()) { + ilength[i] = ireal_sequence[i].size(); + } + if ((!iint_sequence.empty()) && (!ireal_sequence.empty()) && (iint_sequence[i].size() != ireal_sequence[i].size())) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_LENGTH] , i); + } + + if (!iindex_parameter.empty()) { + switch (iindex_param_type) { + + case TIME : { + if (iindex_parameter[i].size() != ilength[i]) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_LENGTH] , i); + } + break; + } + + case POSITION : { + if (iindex_parameter[i].size() != ilength[i] + 1) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_LENGTH] , i); + } + break; + } + } + } + + if ((!ivertex_identifier.empty()) && (ivertex_identifier[i].size() != ilength[i])) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_LENGTH] , i); + } + } + + if (!iint_sequence.empty()) { + nb_int_variable = iint_sequence[0][0].size(); + for (i = 0;i < inb_sequence;i++) { + for (j = 0;j < iint_sequence[i].size();j++) { + if (iint_sequence[i][j].size() != nb_int_variable) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE] , i , j); + } + } + } + } + else { + nb_int_variable = 0; + } + + if (!ireal_sequence.empty()) { + nb_real_variable = ireal_sequence[0][0].size(); + for (i = 0;i < inb_sequence;i++) { + for (j = 0;j < ireal_sequence[i].size();j++) { + if (ireal_sequence[i][j].size() != nb_real_variable) { + status = false; + error.update(STAT_error[STATR_NB_VARIABLE] , i , j); + } + } + } + } + else { + nb_real_variable = 0; + } + + if (status) { + seq = new Sequences(inb_sequence , iidentifier , ilength , ivertex_identifier , iindex_param_type , + iindex_parameter , nb_int_variable , nb_real_variable , iint_sequence , ireal_sequence); + } + + delete [] ilength; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a Sequences object from a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] old_format flag format. + * + * \return Sequences object. + */ +/*--------------------------------------------------------------*/ + +Sequences* Sequences::ascii_read(StatError &error , const string path , bool old_format) + +{ + string buffer , trimmed_buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + bool status , lstatus; + int i , j , k , m; + int line , read_line , offset , initial_nb_line , max_length , nb_variable = 0 , + vector_size , nb_sequence , index , int_value , line_continue , *length; + variable_nature *type; + index_parameter_type index_param_type = IMPLICIT_TYPE; + double real_value; + Sequences *seq; + ifstream in_file(path.c_str()); + + + seq = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + line = 0; + type = NULL; + length = NULL; + + // 1st pass: analysis of the optional line defining the index parameter and + // of the mandatory line defining the number of variables + + read_line = 0; + + while (getline(in_file , buffer)) { + line++; + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (i) { + + case 0 : { + + // test INDEX_PARAMETER keyword + + if ((!old_format) && (read_line == 0) && (*token == SEQ_word[SEQW_INDEX_PARAMETER])) { + index_param_type = TIME; + } + + // test number of variables + + else { + lstatus = true; + +/* try { + int_value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + int_value = atoi(token->c_str()); + + if (lstatus) { + if ((int_value < 1) || (int_value > SEQUENCE_NB_VARIABLE)) { + lstatus = false; + } + else { + nb_variable = int_value; + } + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_NB_VARIABLE] , line , i + 1); + } + } + break; + } + + case 1 : { + + // test separator + + if ((!old_format) && (read_line == 0) && (index_param_type != IMPLICIT_TYPE)) { + if (*token != ":") { + status = false; + error.update(STAT_parsing[STATP_SEPARATOR] , line , i + 1); + } + } + + // test VARIABLE(S) keyword + + else if (*token != STAT_word[nb_variable == 1 ? STATW_VARIABLE : STATW_VARIABLES]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , + STAT_word[nb_variable == 1 ? STATW_VARIABLE : STATW_VARIABLES] , line , i + 1); + } + break; + } + + // test keyword defining the index parameter type + + case 2 : { + if ((!old_format) && (read_line == 0) && (index_param_type != IMPLICIT_TYPE)) { + for (j = TIME;j <= POSITION_INTERVAL;j++) { + if (*token == SEQ_index_parameter_word[j]) { + index_param_type = (index_parameter_type)j; + break; + } + } + + if (j == POSITION_INTERVAL + 1) { + status = false; + error.update(STAT_parsing[STATP_KEYWORD] , line , i + 1); + } + } + break; + } + } + + i++; + } + + if (i > 0) { + if (((!old_format) && (read_line == 0) && (index_param_type != IMPLICIT_TYPE) && (i != 3)) || + (((old_format) || (read_line == 1) || (index_param_type == IMPLICIT_TYPE)) && (i != 2))) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + read_line++; +// if (((!old_format) && (index_param_type != IMPLICIT_TYPE) && (read_line == 2)) || +// (((old_format) || (index_param_type == IMPLICIT_TYPE)) && (read_line == 1)) { + if ((((old_format) || (index_param_type == IMPLICIT_TYPE)) && (read_line == 1)) || + (read_line == 2)) { + break; + } + } + } + + if (read_line < 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT]); + } + + // analysis of the lines defining the variable types + + if (status) { + type = new variable_nature[nb_variable]; + for (i = 0;i < nb_variable;i++) { + type[i] = AUXILIARY; + } + + read_line = 0; + offset = (old_format ? 1 : 0); + + while (getline(in_file , buffer)) { + line++; + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (i) { + + // test VARIABLE keyword + + case 0 : { + if (*token != STAT_word[STATW_VARIABLE]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_VARIABLE] , line , i + 1); + } + break; + } + + // test variable index + + case 1 : { + lstatus = true; + +/* try { + int_value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + int_value = atoi(token->c_str()); + + if ((lstatus) && (int_value != read_line + 1)) { + lstatus = false; + } + + if (!lstatus) { + status = false; + error.correction_update(STAT_parsing[STATP_VARIABLE_INDEX] , read_line + 1 , line , i + 1); + } + break; + } + + // test separator + + case 2 : { + if (*token != ":") { + status = false; + error.update(STAT_parsing[STATP_SEPARATOR] , line , i + 1); + } + break; + } + + // test keyword defining the variable type + + case 3 : { + if ((old_format) && (read_line == 0)) { + for (j = TIME;j <= POSITION_INTERVAL;j++) { + if (*token == SEQ_index_parameter_word[j]) { + index_param_type = (index_parameter_type)j; + break; + } + } + + if (j == POSITION_INTERVAL + 1) { +// for (j = INT_VALUE;j <= STATE;j++) { + for (j = INT_VALUE;j <= OLD_INT_VALUE;j++) { + if (*token == STAT_variable_word[j]) { +// if (j == STATE) { + if ((j == STATE) || (j == OLD_INT_VALUE)) { + j = INT_VALUE; + } + type[read_line] = (variable_nature)j; + break; + } + } + +// if (j == STATE + 1) { + if (j == OLD_INT_VALUE + 1) { + status = false; + error.update(STAT_parsing[STATP_KEYWORD] , line , i + 1); + } + } + } + + else { +// for (j = INT_VALUE;j <= NB_INTERNODE;j++) { + for (j = INT_VALUE;j <= OLD_INT_VALUE;j++) { + if (*token == STAT_variable_word[j]) { +// if ((j == NB_INTERNODE) && ((read_line != offset) || ((read_line == offset) && + if ((j == OLD_INT_VALUE) && ((read_line != offset) || ((read_line == offset) && + (index_param_type != POSITION) && (index_param_type != POSITION_INTERVAL)))) { + status = false; + error.update(STAT_parsing[STATP_VARIABLE_TYPE] , line , i + 1); + } + + else { +// if (j == STATE) { + if ((j == STATE) || (j == OLD_INT_VALUE)) { + j = INT_VALUE; + } + + if ((old_format) && (index_param_type != IMPLICIT_TYPE)) { + type[read_line - 1] = (variable_nature)j; + } + else { + type[read_line] = (variable_nature)j; + } + } + break; + } + } + } + +// if (j == NB_INTERNODE + 1) { + if (j == OLD_INT_VALUE + 1) { + status = false; + error.update(STAT_parsing[STATP_KEYWORD] , line , i + 1); + } + break; + } + } + + i++; + } + + if (i > 0) { + if (i != 4) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + read_line++; + if (read_line == nb_variable) { + break; + } + } + } + + if (read_line < nb_variable) { + status = false; + error.update(STAT_parsing[STATP_FORMAT]); + } + + else { +// if ((((index_param_type == TIME) || (index_param_type == TIME_INTERVAL)) && (read_line < offset + 1)) || +// (((index_param_type == POSITION) || (index_param_type == POSITION_INTERVAL)) && +// ((read_line < offset + 1) || ((read_line > offset + 1) && (type[0] == NB_INTERNODE))))) { + if (((index_param_type == TIME) || (index_param_type == TIME_INTERVAL) || + (index_param_type == POSITION) || (index_param_type == POSITION_INTERVAL)) && + (read_line < offset + 1)) { + status = false; + error.update(STAT_parsing[STATP_VARIABLE_TYPE]); + } + } + + initial_nb_line = line; + } + + if (status) { + vector_size = nb_variable; + + if (index_param_type != IMPLICIT_TYPE) { + if (old_format) { + nb_variable--; + } + else { + vector_size++; + } + } + + nb_sequence = 0; + lstatus = true; + +// while (buffer.readLine(in_file , true)) { + while (getline(in_file , buffer)) { + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + + if (!(buffer.empty())) { + trimmed_buffer = trim_right_copy_if(buffer , is_any_of(" \t")); + + if ((!(trimmed_buffer.empty())) && (trimmed_buffer.find('\\' , trimmed_buffer.length() - 1) == string::npos)) { + nb_sequence++; + lstatus = true; + } + else { + lstatus = false; + } + } + } + + if ((nb_sequence == 0) || (!lstatus)) { + status = false; + error.update(STAT_parsing[STATP_FORMAT]); + } + +# ifdef DEBUG + cout << "\nnumber of sequences : " << nb_sequence << endl; +# endif + } + + // 2nd pass: analysis of the sequence format + + if (status) { +// in_file.close(); +// in_file.open(path.c_str() , ios::in); + + in_file.clear(); + in_file.seekg(0 , ios::beg); + + offset = (index_param_type == IMPLICIT_TYPE ? 0 : 1); + + length = new int[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + if (vector_size == 1) { + length[i] = 0; + } + else { + length[i] = 1; + } + } + + line = 0; + + do { + getline(in_file , buffer); + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + } + while (line < initial_nb_line); + + max_length = 0; + + switch (index_param_type) { + case TIME : + index = -1; + break; + case POSITION : + index = 0; + break; + } + + i = 0; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + + j = 0; + k = 0; + line_continue = false; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + if (line_continue) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line , j); + break; + } + + if ((vector_size > 1) && (j % (vector_size + 1) == vector_size)) { + if (*token == "\\") { + line_continue = true; + length[i]++; + } + + else { + if (*token != "|") { + status = false; + error.update(STAT_parsing[STATP_SEPARATOR] , line , j + 1); + } + else { + k = 0; + length[i]++; + } + } + } + + else { + if ((vector_size == 1) && (*token == "\\")) { + line_continue = true; + } + + else { + lstatus = true; + + if (((index_param_type != IMPLICIT_TYPE) && (k == 0)) || (type[k - offset] != REAL_VALUE)) { +/* try { + int_value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + int_value = atoi(token->c_str()); + + if ((lstatus) && (((k == 0) && (((index_param_type == TIME) || (index_param_type == POSITION) || + (index_param_type == POSITION_INTERVAL)) && (int_value < 0)) || + ((index_param_type == TIME_INTERVAL) && (int_value <= 0))))) { +// ((k == 1) && (type[k - 1] == NB_INTERNODE) && (int_value < 0)))) { + lstatus = false; + } + } + + else { +/* try { + real_value = stod(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + real_value = atof(token->c_str()); + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_DATA_TYPE] , line , j + 1); + } + + else if (k == 0) { + switch (index_param_type) { + + case TIME : { + if (int_value <= index) { + status = false; + error.update(SEQ_parsing[SEQP_TIME_INDEX_ORDER] , line , j + 1); + } + else { + index = int_value; + } + break; + } + + case POSITION : { + if (int_value < index) { + status = false; + error.update(SEQ_parsing[SEQP_POSITION_ORDER] , line , j + 1); + } + else { + index = int_value; + } + break; + } + } + } + + if (vector_size == 1) { + length[i]++; + } + else { + k++; + } + } + } + + j++; + } + + if (j > 0) { + if (vector_size > 1) { + if (((line_continue) || ((index_param_type != POSITION) && (index_param_type != POSITION_INTERVAL))) && + (k != vector_size)) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line , j); + } + } + + if (!line_continue) { + if ((index_param_type == POSITION) || (index_param_type == POSITION_INTERVAL)) { + if (k != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line , j); + } + + length[i]--; + if (length[i] == 0) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line , j); + } + } + + switch (index_param_type) { + case TIME : + index = -1; + break; + case POSITION : + index = 0; + break; + } + + if (length[i] > max_length) { + max_length = length[i]; + } + + if (i < nb_sequence - 1) { + i++; + } + } + } + } + + if (max_length <= 1) { + status = false; + error.update(SEQ_parsing[SEQP_MAX_SEQUENCE_LENGTH]); + } + +# ifdef DEBUG + for (i = 0;i < nb_sequence;i++) { + cout << i << " " << length[i] << " | "; + } + cout << endl; +# endif + } + + // 3rd pass: sequence copy + + if (status) { +// in_file.close(); +// in_file.open(path.c_str() , ios::in); + + in_file.clear(); + in_file.seekg(0 , ios::beg); + + seq = new Sequences(nb_sequence , NULL , length , NULL , + index_param_type , nb_variable , type); + + line = 0; + + do { + getline(in_file , buffer); + line++; + } + while (line < initial_nb_line); + + i = 0; + j = 0; + + while (getline(in_file , buffer)) { + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + + k = 0; + m = 0; + line_continue = false; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + if ((vector_size > 1) && (m % (vector_size + 1) == vector_size)) { + if (*token == "\\") { + line_continue = true; + } + k = 0; + j++; + } + + else { + if ((vector_size == 1) && (*token == "\\")) { + line_continue = true; + } + + else { + if ((index_param_type != IMPLICIT_TYPE) && (k == 0)) { +// seq->index_parameter[i][j] = stoi(*token); in C++ 11 + seq->index_parameter[i][j] = atoi(token->c_str()); + } + else if (type[k - offset] != REAL_VALUE) { +// seq->int_sequence[i][k - offset][j] = stoi(*token); in C++ 11 + seq->int_sequence[i][k - offset][j] = atoi(token->c_str()); + } + else { +// seq->real_sequence[i][k - offset][j] = stod(*token); in C++ 11 + seq->real_sequence[i][k - offset][j] = atof(token->c_str()); + } + + if (vector_size == 1) { + j++; + } + else { + k++; + } + } + } + + m++; + } + + if ((m > 0) && (!line_continue)) { + i++; + j = 0; + } + } + + if ((seq->index_param_type == TIME_INTERVAL) || (seq->index_param_type == POSITION_INTERVAL)) { + seq->index_parameter_computation(); + } + + if (seq->index_parameter) { + seq->build_index_parameter_frequency_distribution(); + } +// if ((seq->index_param_type == TIME) || ((seq->index_param_type == POSITION) && +// (seq->type[0] != NB_INTERNODE))) { + if ((seq->index_param_type == TIME) || (seq->index_param_type == POSITION)) { + seq->index_interval_computation(); + } + + for (i = 0;i < nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + } + } + + delete [] type; + delete [] length; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing on a single line of a Sequences object. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::line_write(ostream &os) const + +{ + os << nb_sequence << " " << SEQ_label[nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << " " + << nb_variable << " " << STAT_word[nb_variable == 1 ? STATW_VARIABLE : STATW_VARIABLES] << " " + << SEQ_label[nb_sequence == 1 ? SEQL_LENGTH : SEQL_CUMUL_LENGTH] << ": " << cumul_length; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Sequences object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level, + * \param[in] comment_flag flag comment. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::ascii_write(ostream &os , bool exhaustive , bool comment_flag) const + +{ + int i , j , k; + int *int_value , *pint_value; + double mean , variance , median , lower_quartile , upper_quartile , *real_value , *preal_value; + + + if (index_parameter) { + os << SEQ_word[SEQW_INDEX_PARAMETER] << " : " + << SEQ_index_parameter_word[index_param_type]; + } + + if (index_parameter_distribution) { + os << " "; + if (comment_flag) { + os << "# "; + } + os << "(" << SEQ_label[SEQL_MIN_INDEX_PARAMETER] << ": " << index_parameter_distribution->offset << ", " + << SEQ_label[SEQL_MAX_INDEX_PARAMETER] << ": " << index_parameter_distribution->nb_value - 1 << ")" << endl; + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << (index_param_type == TIME ? SEQ_label[SEQL_TIME] : SEQ_label[SEQL_POSITION]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + index_parameter_distribution->ascii_characteristic_print(os , false , comment_flag); + + if (exhaustive) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << (index_param_type == TIME ? SEQ_label[SEQL_TIME] : SEQ_label[SEQL_POSITION]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + index_parameter_distribution->ascii_print(os , comment_flag); + } + } + + else { + os << endl; + } + + if (index_interval) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << (index_param_type == TIME ? SEQ_label[SEQL_TIME_INTERVAL] : SEQ_label[SEQL_POSITION_INTERVAL]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + index_interval->ascii_characteristic_print(os , false , comment_flag); + + if (exhaustive) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << (index_param_type == TIME ? SEQ_label[SEQL_TIME_INTERVAL] : SEQ_label[SEQL_POSITION_INTERVAL]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + index_interval->ascii_print(os , comment_flag); + } + } + + if (index_parameter) { + os << "\n"; + } + os << nb_variable << " " << STAT_word[nb_variable == 1 ? STATW_VARIABLE : STATW_VARIABLES] << endl; + + for (i = 0;i < nb_variable;i++) { + os << "\n" << STAT_word[STATW_VARIABLE] << " " << i + 1 << " : " + << STAT_variable_word[type[i]]; + + if (type[i] != AUXILIARY) { + os << " "; + if (comment_flag) { + os << "# "; + } + os << "(" << STAT_label[STATL_MIN_VALUE] << ": " << min_value[i] << ", " + << STAT_label[STATL_MAX_VALUE] << ": " << max_value[i] << ")" << endl; + + if (marginal_distribution[i]) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + + marginal_distribution[i]->ascii_characteristic_print(os , exhaustive , comment_flag); + + if ((marginal_distribution[i]->nb_value <= ASCII_NB_VALUE) || (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << STAT_label[STATL_FREQUENCY] << endl; + marginal_distribution[i]->ascii_print(os , comment_flag); + } + } + + else { + mean = mean_computation(i); + variance = variance_computation(i , mean); + + if (variance > 0.) { + switch (type[i]) { + + case INT_VALUE : { + int_value = new int[cumul_length]; + pint_value = int_value; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + *pint_value++ = int_sequence[j][i][k]; + } + } + + lower_quartile = quantile_computation(cumul_length , int_value , 0.25); + median = quantile_computation(cumul_length , int_value , 0.5); + upper_quartile = quantile_computation(cumul_length , int_value , 0.75); + + delete [] int_value; + break; + } + + case REAL_VALUE : { + real_value = new double[cumul_length]; + preal_value = real_value; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + *preal_value++ = real_sequence[j][i][k]; + } + } + + lower_quartile = quantile_computation(cumul_length , real_value , 0.25); + median = quantile_computation(cumul_length , real_value , 0.5); + upper_quartile = quantile_computation(cumul_length , real_value , 0.75); + + delete [] real_value; + break; + } + } + } + + else { + median = mean; + } + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_SAMPLE_SIZE] << ": " << cumul_length << endl; + + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_MEAN] << ": " << mean << " " + << STAT_label[STATL_MEDIAN] << ": " << median << endl; + + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_VARIANCE] << ": " << variance << " " + << STAT_label[STATL_STANDARD_DEVIATION] << ": " << sqrt(variance); + if (variance > 0.) { + os << " " << STAT_label[STATL_LOWER_QUARTILE] << ": " << lower_quartile + << " " << STAT_label[STATL_UPPER_QUARTILE] << ": " << upper_quartile; + } + os << endl; + + if ((variance > 0.) && (exhaustive)) { + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_SKEWNESS_COEFF] << ": " << skewness_computation(i , mean , variance) << " " + << STAT_label[STATL_KURTOSIS_COEFF] << ": " << kurtosis_computation(i , mean , variance) << endl; + } + + if ((marginal_histogram[i]) && (exhaustive)) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_HISTOGRAM] << endl; + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_VALUE] << " | " << STAT_label[STATL_FREQUENCY] << endl; + marginal_histogram[i]->ascii_print(os , comment_flag); + } + } + } + + else { + +# ifdef MESSAGE + mean = mean_computation(i); + variance = variance_computation(i , mean); + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << STAT_label[STATL_MEAN] << ": " << mean << " " + << STAT_label[STATL_VARIANCE] << ": " << variance << " " + << STAT_label[STATL_STANDARD_DEVIATION] << ": " << sqrt(variance) << endl; +# endif + +// os << endl; + } + } + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + length_distribution->ascii_characteristic_print(os , false , comment_flag); + + if (exhaustive) { + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + length_distribution->ascii_print(os , comment_flag); + } + + os << "\n"; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_CUMUL_LENGTH] << ": " << cumul_length << endl; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Sequences object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::ascii_write(ostream &os , bool exhaustive) const + +{ + return ascii_write(os , exhaustive , false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Sequences object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + ascii_write(out_file , exhaustive , false); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of sequences. + * + * \param[in,out] os stream, + * \param[in] format format (LINE/COLUMN/VECTOR/POSTERIOR_PROBABILITY), + * \param[in] comment_flag flag comment, + * \param[in] posterior_probability posterior probabilities of the most probable state sequences, + * \param[in] entropy entropies of state sequences, + * \param[in] nb_state_sequence numbers of state sequences (hidden Markovian models), + * \param[in] posterior_state_probability posterior probabilities of the most probable initial state, + * \param[in] line_nb_character number of characters per line. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::ascii_print(ostream &os , output_sequence_format format , bool comment_flag , + double *posterior_probability , double *entropy , + double *nb_state_sequence , double *posterior_state_probability , + int line_nb_character) const + +{ + int i , j , k , m; + + + switch (format) { + + case COLUMN : { + for (i = 0;i < nb_sequence;i++) { + os << "\n"; + +# ifdef DEBUG + for (j = 0;j < length[i];j++) { + if (index_parameter) { + os << index_parameter[i][j] << " "; + } + for (k = 0;k < nb_variable;k++) { + if ((type[k] != REAL_VALUE) && (type[k] != AUXILIARY)) { + os << int_sequence[i][k][j] << " "; + } + else { + os << real_sequence[i][k][j] << " "; + } + } + + if (j < length[i] - 1) { + if ((os.rdbuf())->in_avail() > line_nb_character) { + os << "\\" << endl; + } + + else { + if ((index_parameter) || (nb_variable > 1)) { + os << "| "; + } + } + } + } + +# else + ostringstream sos; + + for (j = 0;j < length[i];j++) { + if (index_parameter) { + sos << index_parameter[i][j] << " "; + } + for (k = 0;k < nb_variable;k++) { + if ((type[k] != REAL_VALUE) && (type[k] != AUXILIARY)) { + sos << int_sequence[i][k][j] << " "; + } + else { + sos << real_sequence[i][k][j] << " "; + } + } + + if (j < length[i] - 1) { + if (sos.str().size() > line_nb_character) { + os << sos.str() << "\\" << endl; + sos.str(""); + } + + else { + if ((index_parameter) || (nb_variable > 1)) { + sos << "| "; + } + } + } + } + + os << sos.str(); +# endif + + if (index_param_type == POSITION) { + os << "| " << index_parameter[i][length[i]]; + } + + os << " "; + if (comment_flag) { + os << "# "; + } + os << "(" << identifier[i] << ")" << endl; + + if ((posterior_probability) && (entropy) && (nb_state_sequence)) { + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] + << ": " << posterior_probability[i] << " " + << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << entropy[i] << " " + << SEQ_label[SEQL_NB_STATE_SEQUENCE] << ": " << nb_state_sequence[i] << endl; + if (comment_flag) { + os << "# "; + } +// os << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY_LOG_RATIO] +// << ": " << log(nb_state_sequence[i]) + log(posterior_probability[i]) << " " + os << SEQ_label[SEQL_STATE_SEQUENCE_DIVERGENCE] << ": " + << log(nb_state_sequence[i]) - entropy[i] << endl; + } + } + break; + } + + case VECTOR : { + for (i = 0;i < nb_sequence;i++) { + os << "\n"; + + for (j = 0;j < length[i];j++) { + if (index_parameter) { + os << index_parameter[i][j] << " "; + } + for (k = 0;k < nb_variable;k++) { + if ((type[k] != REAL_VALUE) && (type[k] != AUXILIARY)) { + os << int_sequence[i][k][j] << " "; + } + else { + os << real_sequence[i][k][j] << " "; + } + } + + if (j < length[i] - 1) { + os << "\\" << endl; + } + } + + if (index_param_type == POSITION) { + os << "| " << index_parameter[i][length[i]]; + } + + os << " "; + if (comment_flag) { + os << "# "; + } + os << "(" << identifier[i] << ")" << endl; + + if ((posterior_probability) && (entropy) && (nb_state_sequence)) { + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] + << ": " << posterior_probability[i] << " " + << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << entropy[i] << " " + << SEQ_label[SEQL_NB_STATE_SEQUENCE] << ": " << nb_state_sequence[i] << endl; + if (comment_flag) { + os << "# "; + } +// os << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY_LOG_RATIO] +// << ": " << log(nb_state_sequence[i]) + log(posterior_probability[i]) << " " + os << SEQ_label[SEQL_STATE_SEQUENCE_DIVERGENCE] << ": " + << log(nb_state_sequence[i]) - entropy[i] << endl; + } + } + break; + } + + case LINE : { + int buff , start , width; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + if (index_parameter) { + width = column_width(index_parameter_distribution->nb_value - 1); + } + else { + width = 0; + } + + for (i = 0;i < nb_variable;i++) { + if ((type[i] != REAL_VALUE) && (type[i] != AUXILIARY)) { + buff = column_width((int)min_value[i] , (int)max_value[i]); + if (buff > width) { + width = buff; + } + } + + else { + for (j = 0;j < nb_sequence;j++) { + buff = column_width(length[j] , real_sequence[j][i]); + if (buff > width) { + width = buff; + } + } + } + } + + for (i = 0;i < nb_sequence;i++) { + os << "\n"; + start = 0; + + for (j = 0;j < length[i];j++) { + os << setw(j == start ? width : width + 1); + if (index_parameter) { + os << index_parameter[i][j]; + } + else if (type[0] != REAL_VALUE) { + os << int_sequence[i][0][j]; + } + else { + os << real_sequence[i][0][j]; + } + + if (j < length[i] - 1) { + if ((j - start) * (width + 1) > 10000) { +// if ((j - start) * (width + 1) > line_nb_character) { + os << " \\" << endl; + + for (k = (index_parameter ? 0 : 1);k < nb_variable;k++) { + if ((type[k] != REAL_VALUE) && (type[k] != AUXILIARY)) { + os << setw(width) << int_sequence[i][k][start]; + for (m = start + 1;m <= j;m++) { + os << setw(width + 1) << int_sequence[i][k][m]; + } + } + + else { + os << setw(width) << real_sequence[i][k][start]; + for (m = start + 1;m <= j;m++) { + os << setw(width + 1) << real_sequence[i][k][m]; + } + } + + os << " \\" << endl; + } + start = j + 1; + } + } + + else { + if (index_param_type == POSITION) { + os << setw(width + 1) << index_parameter[i][length[i]]; + } + + for (k = (index_parameter ? 0 : 1);k < nb_variable;k++) { + if ((type[k] != REAL_VALUE) && (type[k] != AUXILIARY)) { + os << endl; + os << setw(width) << int_sequence[i][k][start]; + for (m = start + 1;m <= j;m++) { + os << setw(width + 1) << int_sequence[i][k][m]; + } + } + + else { + os << endl; + os << setw(width) << real_sequence[i][k][start]; + for (m = start + 1;m <= j;m++) { + os << setw(width + 1) << real_sequence[i][k][m]; + } + } + } + } + } + + os << " "; + if (comment_flag) { + os << "# "; + } + os << "(" << identifier[i] << ")" << endl; + + if ((posterior_probability) && (entropy) && (nb_state_sequence)) { + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] + << ": " << posterior_probability[i] << " " + << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << entropy[i] << " " + << SEQ_label[SEQL_NB_STATE_SEQUENCE] << ": " << nb_state_sequence[i] << endl; + if (comment_flag) { + os << "# "; + } +// os << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY_LOG_RATIO] +// << ": " << log(nb_state_sequence[i]) + log(posterior_probability[i]) << " " + os << SEQ_label[SEQL_STATE_SEQUENCE_DIVERGENCE] << ": " + << log(nb_state_sequence[i]) - entropy[i] << endl; + } + } + + os.setf(format_flags , ios::adjustfield); + break; + } + + case ARRAY : { + os << "["; + for (i = 0;i < nb_sequence;i++) { + +# ifdef DEBUG + os << "["; + for (j = 0;j < length[i];j++) { + if ((!index_parameter) && (nb_variable == 1)) { + if (type[0] != REAL_VALUE) { + os << int_sequence[i][0][j]; + } + else { + os << real_sequence[i][0][j]; + } + } + + else { + os << "["; + if (index_parameter) { + os << index_parameter[i][j] << ","; + } + + for (k = 0;k < nb_variable - 1;k++) { + if ((type[k] != REAL_VALUE) && (type[k] != AUXILIARY)) { + os << int_sequence[i][k][j] << ","; + } + else { + os << real_sequence[i][k][j] << ","; + } + } + + if ((type[nb_variable - 1] != REAL_VALUE) && (type[nb_variable - 1] != AUXILIARY)) { + os << int_sequence[i][nb_variable - 1][j] << "]"; + } + else { + os << real_sequence[i][nb_variable - 1][j] << "]"; + } + } + + if (j < length[i] - 1) { + os << ","; + if ((os.rdbuf())->in_avail() > line_nb_character) { + os << "\\" << endl; + os << " "; + } + } + } + +# else + ostringstream sos; + + sos << "["; + for (j = 0;j < length[i];j++) { + if ((!index_parameter) && (nb_variable == 1)) { + if (type[0] == REAL_VALUE) { + sos << int_sequence[i][0][j]; + } + else { + sos << real_sequence[i][0][j]; + } + } + + else { + sos << "["; + if (index_parameter) { + sos << index_parameter[i][j] << ","; + } + + for (k = 0;k < nb_variable - 1;k++) { + if ((type[k] != REAL_VALUE) && (type[k] != AUXILIARY)) { + sos << int_sequence[i][k][j] << ","; + } + else { + sos << real_sequence[i][k][j] << ","; + } + } + + if ((type[nb_variable - 1] != REAL_VALUE) && (type[nb_variable - 1] != AUXILIARY)) { + sos << int_sequence[i][nb_variable - 1][j] << "]"; + } + else { + sos << real_sequence[i][nb_variable - 1][j] << "]"; + } + } + + if (j < length[i] - 1) { + sos << ","; + if (sos.str().size() > line_nb_character) { + os << sos.str() << "\\" << endl; + os << " "; + sos.str(""); + } + } + } + + os << sos.str(); +# endif + + if (index_param_type == POSITION) { + os << ",[" << index_parameter[i][length[i]]; + for (j = 1;j < nb_variable;j++) { + os << "," << I_DEFAULT; + } + os << "]"; + } + + os << "]"; + if (i < nb_sequence - 1) { + os << ",\\" << endl; + os << " "; + } + } + os << "]" << endl; + break; + } + + case POSTERIOR_PROBABILITY : { + if ((posterior_probability) && (entropy) && (nb_state_sequence)) { + bool *selected_sequence; + int index , width[7]; + double max , *divergence; + ios_base::fmtflags format_flags; + + + divergence = new double[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + divergence[i] = log(nb_state_sequence[i]) - entropy[i]; + } + + width[0] = column_width(nb_sequence) + ASCII_SPACE; + width[1] = column_width(nb_sequence , posterior_probability) + ASCII_SPACE; + width[2] = column_width(nb_sequence , entropy) + ASCII_SPACE; + width[3] = column_width(nb_sequence , divergence) + ASCII_SPACE; + width[4] = column_width(nb_sequence , nb_state_sequence) + ASCII_SPACE; + width[5] = column_width(max_length) + ASCII_SPACE; + if (posterior_state_probability) { + width[6] = column_width(nb_sequence , posterior_state_probability) + ASCII_SPACE; + } + + selected_sequence = new bool[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + selected_sequence[i] = false; + } + + format_flags = os.setf(ios::left , ios::adjustfield); + + os << "\n" << (posterior_state_probability ? 7 : 6) << " " << STAT_word[STATW_VARIABLES] << endl; + + i = 1; + os << "\n" << STAT_word[STATW_VARIABLE] << " " << i++ << " : " << STAT_variable_word[INT_VALUE] << endl; + + if (posterior_state_probability) { + os << STAT_word[STATW_VARIABLE] << " " << i++ << " : " << STAT_variable_word[REAL_VALUE] << " "; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_POSTERIOR_INITIAL_STATE_PROBABILITY] << endl; + } + + os << STAT_word[STATW_VARIABLE] << " " << i++ << " : " << STAT_variable_word[REAL_VALUE] << " "; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_POSTERIOR_STATE_SEQUENCE_PROBABILITY] << endl; + + os << STAT_word[STATW_VARIABLE] << " " << i++ << " : " << STAT_variable_word[REAL_VALUE] << " "; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << endl; + + os << STAT_word[STATW_VARIABLE] << " " << i++ << " : " << STAT_variable_word[REAL_VALUE] << " "; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_STATE_SEQUENCE_DIVERGENCE] << endl; + + os << STAT_word[STATW_VARIABLE] << " " << i++ << " : " << STAT_variable_word[REAL_VALUE] << " "; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_NB_STATE_SEQUENCE] << endl; + + os << STAT_word[STATW_VARIABLE] << " " << i << " : " << STAT_variable_word[INT_VALUE] << " "; + if (comment_flag) { + os << "# "; + } + os << SEQ_label[SEQL_SEQUENCE_LENGTH] << endl; + + os << "\n"; + for (i = 0;i < nb_sequence;i++) { + +# ifdef DEBUG + for (j = 0;j < length[i];j++) { // for Fuji/Braeburn GUs + if (int_sequence[i][0][j] <= 1) { + break; + } + } + + if (j < length[i]) { + os << setw(width[1]) << posterior_probability[i] + << setw(width[2]) << entropy[i] + << setw(width[3]) << divergence[i] + << setw(width[4]) << nb_state_sequence[i] + << setw(width[5]) << length[i]; + if (comment_flag) { + os << "# "; + } + os << "(" << identifier[i] << ")" << endl; + } +# endif + + max = 0.; + + if (posterior_state_probability) { + for (j = 0;j < nb_sequence;j++) { + if ((!selected_sequence[j]) && (posterior_state_probability[j] > max)) { + max = posterior_state_probability[j]; + index = j; + } + } + } + + else { + for (j = 0;j < nb_sequence;j++) { +/* if ((!selected_sequence[j]) && (entropy[j] > max)) { + max = entropy[j]; */ + if ((!selected_sequence[j]) && (posterior_probability[j] > max)) { + max = posterior_probability[j]; + index = j; + } + } + } + + selected_sequence[index] = true; + + os << setw(width[0]) << i + 1; + + if (posterior_state_probability) { + +# ifdef MESSAGE + if (posterior_probability[index] > posterior_state_probability[index] + DOUBLE_ERROR) { + cout << "\n" << SEQ_label[SEQL_SEQUENCE] << " " << identifier[index] << ", "<< SEQ_error[SEQR_POSTERIOR_PROBABILITY_ORDER] << endl; + } +# endif + + os << setw(width[6]) << posterior_state_probability[index]; + } + + os << setw(width[1]) << posterior_probability[index] + << setw(width[2]) << entropy[index] + << setw(width[3]) << divergence[index] + << setw(width[4]) << nb_state_sequence[index] + << setw(width[5]) << length[index]; + if (comment_flag) { + os << "# "; + } + os << "(" << identifier[index] << ")" << endl; + } + + delete [] divergence; + delete [] selected_sequence; + + os.setf(format_flags , ios::adjustfield); + } + break; + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Sequences object. + * + * \param[in,out] os stream, + * \param[in] format format (LINE/COLUMN/VECTOR/POSTERIOR_PROBABILITY), + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& Sequences::ascii_data_write(ostream &os , output_sequence_format format , + bool exhaustive) const + +{ + ascii_write(os , exhaustive , false); + ascii_print(os , format , false); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Sequences object. + * + * \param[in] format format (LINE/COLUMN/VECTOR/POSTERIOR_PROBABILITY), + * \param[in] exhaustive flag detail level, + * + * \return string. + */ +/*--------------------------------------------------------------*/ + +string Sequences::ascii_data_write(output_sequence_format format , bool exhaustive) const + +{ + ostringstream oss; + + + ascii_data_write(oss , format , exhaustive); + + return oss.str(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Sequences object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] format format (LINE/COLUMN/VECTOR/POSTERIOR_PROBABILITY), + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::ascii_data_write(StatError &error , const string path , + output_sequence_format format , bool exhaustive) const + +{ + bool status = false; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + if (format != 'a') { + ascii_write(out_file , exhaustive , true); + } + ascii_print(out_file , format , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a Sequences object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::spreadsheet_write(StatError &error , const string path) const + +{ + bool status; + int i , j , k; + int *int_value , *pint_value; + double mean , variance , median , lower_quartile , upper_quartile , *real_value , *preal_value; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + + if (index_parameter) { + out_file << SEQ_word[SEQW_INDEX_PARAMETER] << "\t" + << SEQ_index_parameter_word[index_param_type]; + } + + if (index_parameter_distribution) { + out_file << "\t\t" << SEQ_label[SEQL_MIN_INDEX_PARAMETER] << "\t" << index_parameter_distribution->offset + << "\t\t" << SEQ_label[SEQL_MAX_INDEX_PARAMETER] << "\t" << index_parameter_distribution->nb_value - 1 << endl; + + out_file << "\n" << (index_param_type == TIME ? SEQ_label[SEQL_TIME] : SEQ_label[SEQL_POSITION]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + index_parameter_distribution->spreadsheet_characteristic_print(out_file); + + out_file << "\n\t" << (index_param_type == TIME ? SEQ_label[SEQL_TIME] : SEQ_label[SEQL_POSITION]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + index_parameter_distribution->spreadsheet_print(out_file); + } + + else { + out_file << endl; + } + + if (index_interval) { + out_file << "\n" << (index_param_type == TIME ? SEQ_label[SEQL_TIME_INTERVAL] : SEQ_label[SEQL_POSITION_INTERVAL]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + index_interval->spreadsheet_characteristic_print(out_file); + + out_file << "\n\t" << (index_param_type == TIME ? SEQ_label[SEQL_TIME_INTERVAL] : SEQ_label[SEQL_POSITION_INTERVAL]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + index_interval->spreadsheet_print(out_file); + } + + if (index_parameter) { + out_file << "\n"; + } + out_file << nb_variable << "\t" << STAT_word[nb_variable == 1 ? STATW_VARIABLE : STATW_VARIABLES] << endl; + + for (i = 0;i < nb_variable;i++) { + out_file << "\n" << STAT_word[STATW_VARIABLE] << "\t" << i + 1 << "\t" + << STAT_variable_word[type[i]]; + + if (type[i] != AUXILIARY) { + out_file << "\t\t" << STAT_label[STATL_MIN_VALUE] << "\t" << min_value[i] + << "\t\t" << STAT_label[STATL_MAX_VALUE] << "\t" << max_value[i] << endl; + + if (marginal_distribution[i]) { + out_file << "\n" << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + marginal_distribution[i]->spreadsheet_characteristic_print(out_file); + + out_file << "\n\t" << STAT_label[STATL_FREQUENCY] << endl; + marginal_distribution[i]->spreadsheet_print(out_file); + } + + else { + mean = mean_computation(i); + variance = variance_computation(i , mean); + + if (variance > 0.) { + switch (type[i]) { + + case INT_VALUE : { + int_value = new int[cumul_length]; + pint_value = int_value; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + *pint_value++ = int_sequence[j][i][k]; + } + } + + lower_quartile = quantile_computation(cumul_length , int_value , 0.25); + median = quantile_computation(cumul_length , int_value , 0.5); + upper_quartile = quantile_computation(cumul_length , int_value , 0.75); + + delete [] int_value; + break; + } + + case REAL_VALUE : { + real_value = new double[cumul_length]; + preal_value = real_value; + for (j = 0;j < nb_sequence;j++) { + for (k = 0;k < length[j];k++) { + *preal_value++ = real_sequence[j][i][k]; + } + } + + lower_quartile = quantile_computation(cumul_length , real_value , 0.25); + median = quantile_computation(cumul_length , real_value , 0.5); + upper_quartile = quantile_computation(cumul_length , real_value , 0.75); + + delete [] real_value; + break; + } + } + } + + else { + median = mean; + } + + out_file << "\n" << STAT_label[STATL_SAMPLE_SIZE] << "\t" << cumul_length << endl; + + out_file << STAT_label[STATL_MEAN] << "\t" << mean << "\t\t" + << STAT_label[STATL_MEDIAN] << "\t" << median << endl; + + out_file << STAT_label[STATL_VARIANCE] << "\t" << variance << "\t\t" + << STAT_label[STATL_STANDARD_DEVIATION] << "\t" << sqrt(variance); + if (variance > 0.) { + out_file << "\t\t" << STAT_label[STATL_LOWER_QUARTILE] << "\t" << lower_quartile + << "\t\t" << STAT_label[STATL_UPPER_QUARTILE] << "\t" << upper_quartile; + } + out_file << endl; + + if (variance > 0.) { + out_file << STAT_label[STATL_SKEWNESS_COEFF] << "\t" << skewness_computation(i , mean , variance) << "\t\t" + << STAT_label[STATL_KURTOSIS_COEFF] << "\t" << kurtosis_computation(i , mean , variance) << endl; + } + + if (marginal_histogram[i]) { + out_file << "\n" << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_HISTOGRAM] << endl; + out_file << "\n" << STAT_label[STATL_VALUE] << "\t" << STAT_label[STATL_FREQUENCY] << endl; + marginal_histogram[i]->spreadsheet_print(out_file); + } + } + } + + else { + out_file << endl; + } + } + + out_file << "\n" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + length_distribution->spreadsheet_characteristic_print(out_file); + + out_file << "\n\t" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + length_distribution->spreadsheet_print(out_file); + + out_file << "\n" << SEQ_label[SEQL_CUMUL_LENGTH] << "\t" << cumul_length << endl; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a Sequences object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status; + int i , j; + int nb_histo; + const FrequencyDistribution *phisto[2]; + ostringstream *data_file_name; + + + error.init(); + + // writing of the data files + + data_file_name = new ostringstream[nb_variable + 1]; + data_file_name[0] << prefix << 0 << ".dat"; + + nb_histo = 0; + + if (index_parameter_distribution) { + phisto[nb_histo++] = index_parameter_distribution; + } + if (index_interval) { + phisto[nb_histo++] = index_interval; + } + + status = length_distribution->plot_print((data_file_name[0].str()).c_str() , nb_histo , phisto); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + else { + for (i = 0;i < nb_variable;i++) { + if (marginal_distribution[i]) { + data_file_name[i + 1] << prefix << i + 1 << ".dat"; + marginal_distribution[i]->plot_print((data_file_name[i + 1].str()).c_str()); + } + else if (marginal_histogram[i]) { + data_file_name[i + 1] << prefix << i + 1 << ".dat"; + marginal_histogram[i]->plot_print((data_file_name[i + 1].str()).c_str()); + } + } + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + j = 2; + + if (index_parameter_distribution) { + if (index_parameter_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(index_parameter_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [" << index_parameter_distribution->offset << ":" + << index_parameter_distribution->nb_value - 1 << "] [0:" + << (int)(index_parameter_distribution->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ << " title \"" + << (index_param_type == TIME ? SEQ_label[SEQL_TIME] : SEQ_label[SEQL_POSITION]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (index_parameter_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(index_parameter_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (index_interval) { + if (index_interval->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(index_interval->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << index_interval->nb_value - 1 << "] [0:" + << (int)(index_interval->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ << " title \"" + << (index_param_type == TIME ? SEQ_label[SEQL_TIME_INTERVAL] : SEQ_label[SEQL_POSITION_INTERVAL]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (index_interval->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(index_interval->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + for (j = 0;j < nb_variable;j++) { + if (marginal_distribution[j]) { + if (marginal_distribution[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(marginal_distribution[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << MAX(marginal_distribution[j]->nb_value - 1 , 1) << "] [0:" + << (int)(marginal_distribution[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[j + 1].str()).c_str()) << "\" using 1 title \""; + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << j + 1 << " - "; + } + out_file << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (marginal_distribution[j]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(marginal_distribution[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + else if (marginal_histogram[j]) { + if ((int)(marginal_histogram[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [" << marginal_histogram[j]->min_value - marginal_histogram[j]->bin_width << ":" + << marginal_histogram[j]->max_value + marginal_histogram[j]->bin_width << "] [0:" + << (int)(marginal_histogram[j]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[j + 1].str()).c_str()) << "\" using 1:2 title \"" + << STAT_label[STATL_VARIABLE] << " " << j + 1 << " - " + << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_HISTOGRAM] + << "\" with histeps" << endl; + + if ((int)(marginal_histogram[j]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + } + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(length_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << length_distribution->nb_value - 1 << "] [0:" + << (int)(length_distribution->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using 1 title \"" + << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(length_distribution->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + delete [] data_file_name; + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a Sequences object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* Sequences::get_plotable() const + +{ + int i , j; + int nb_plot_set; + ostringstream legend; + MultiPlotSet *plot_set; + + + nb_plot_set = 1; + if (index_parameter_distribution) { + nb_plot_set++; + } + if (index_interval) { + nb_plot_set++; + } + for (i = 0;i < nb_variable;i++) { + if ((marginal_distribution[i]) || (marginal_histogram[i])) { + nb_plot_set++; + } + } + + plot_set = new MultiPlotSet(nb_plot_set); + + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + i = 0; + + if (index_parameter_distribution) { + + // index parameter frequency distribution + + plot[i].xrange = Range(index_parameter_distribution->offset , index_parameter_distribution->nb_value - 1); + plot[i].yrange = Range(0 , ceil(index_parameter_distribution->max * YSCALE)); + + if (index_parameter_distribution->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(index_parameter_distribution->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(1); + + legend.str(""); + legend << (index_param_type == TIME ? SEQ_label[SEQL_TIME] : SEQ_label[SEQL_POSITION]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + index_parameter_distribution->plotable_frequency_write(plot[i][0]); + i++; + } + + if (index_interval) { + + // between-index interval frequency distribution + + plot[i].xrange = Range(0 , index_interval->nb_value - 1); + plot[i].yrange = Range(0 , ceil(index_interval->max * YSCALE)); + + if (index_interval->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(index_interval->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(1); + + legend.str(""); + legend << (index_param_type == TIME ? SEQ_label[SEQL_TIME_INTERVAL] : SEQ_label[SEQL_POSITION_INTERVAL]) + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + index_interval->plotable_frequency_write(plot[i][0]); + i++; + } + + for (j = 0;j < nb_variable;j++) { + if (marginal_distribution[j]) { + + // marginal frequency distribution + + plot[i].xrange = Range(0 , MAX(marginal_distribution[j]->nb_value - 1 , 1)); + plot[i].yrange = Range(0 , ceil(marginal_distribution[j]->max * YSCALE)); + + if (marginal_distribution[j]->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(marginal_distribution[j]->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(1); + + legend.str(""); + if (nb_variable > 1) { + legend << STAT_label[STATL_VARIABLE] << " " << j + 1 << " - "; + } + legend << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + marginal_distribution[j]->plotable_frequency_write(plot[i][0]); + i++; + } + + else if (marginal_histogram[j]) { + + // marginal histogram + + plot[i].xrange = Range(marginal_histogram[j]->min_value - marginal_histogram[j]->bin_width , + marginal_histogram[j]->max_value + marginal_histogram[j]->bin_width); + plot[i].yrange = Range(0 , ceil(marginal_histogram[j]->max * YSCALE)); + + if (ceil(marginal_histogram[j]->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(1); + + legend.str(""); + legend << STAT_label[STATL_VARIABLE] << " " << j + 1 << " " + << STAT_label[STATL_MARGINAL] << " " << STAT_label[STATL_HISTOGRAM]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "histeps"; + + marginal_histogram[j]->plotable_write(plot[i][0]); + i++; + } + } + + // sequence length frequency distribution + + plot[i].xrange = Range(0 , length_distribution->nb_value - 1); + plot[i].yrange = Range(0 , ceil(length_distribution->max * YSCALE)); + + if (length_distribution->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(length_distribution->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + length_distribution->plotable_frequency_write(plot[i][0]); + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of sequences at the Gnuplot format. + * + * \param[in] path file path, + * \param[in] ilength sequence length. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::plot_print(const char *path , int ilength) const + +{ + bool status = false; + int i , j , k; + int length_nb_sequence , *index , *plength; + ofstream out_file(path); + + + if (out_file) { + status = true; + + index = new int[nb_sequence]; + + length_nb_sequence = 0; + plength = length; + for (i = 0;i < nb_sequence;i++) { + if (*plength++ == ilength) { + index[length_nb_sequence++] = i; + } + } + + for (i = 0;i < ilength;i++) { + for (j = 0;j < length_nb_sequence;j++) { + if (index_parameter) { + out_file << index_parameter[index[j]][i] << " "; + } + for (k = 0;k < nb_variable;k++) { + if ((type[k] != REAL_VALUE) && (type[k] != AUXILIARY)) { + out_file << int_sequence[index[j]][k][i] << " "; + } + else { + out_file << real_sequence[index[j]][k][i] << " "; + } + } + } + out_file << endl; + } + + delete [] index; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a Sequences object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::plot_data_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status; + int i , j , k; + int min_index_parameter , max_index_parameter , *pfrequency , *length_nb_sequence; + double min , max; + ostringstream *data_file_name; + + + error.init(); + + if (nb_sequence > PLOT_NB_SEQUENCE) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + + // writing of the data file + + data_file_name = new ostringstream[length_distribution->nb_value]; + + data_file_name[length_distribution->offset] << prefix << length_distribution->offset << ".dat"; + status = plot_print((data_file_name[length_distribution->offset].str()).c_str() , length_distribution->offset); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + else { + pfrequency = length_distribution->frequency + length_distribution->offset + 1; + for (i = length_distribution->offset + 1;i < length_distribution->nb_value;i++) { + if (*pfrequency++ > 0) { + data_file_name[i] << prefix << i << ".dat"; + plot_print((data_file_name[i].str()).c_str() , i); + } + } + + length_nb_sequence = new int[length_distribution->nb_value]; + + if (index_parameter) { + min_index_parameter = index_parameter_distribution->offset; + max_index_parameter = max_index_parameter_computation(true); + } + + // writing of the script files + + for (i = 0;i < 2;i++) { + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n"; + + for (j = 0;j < nb_variable;j++) { + for (k = 0;k < length_distribution->nb_value;k++) { + length_nb_sequence[k] = 0; + } + + out_file << "set title \""; + if (title) { + out_file << title; + if (nb_variable > 1) { + out_file << " - "; + } + } + + if (nb_variable > 1) { + out_file << STAT_label[STATL_VARIABLE] << " " << j + 1; + } + out_file << "\"\n\n"; + + min = min_value[j]; + max = max_value[j]; + + if (max == min) { + max = min + 1; + } + if ((j + 1 < nb_variable) && (type[j + 1] == AUXILIARY)) { + if (min_value[j + 1] < min) { + min = min_value[j + 1]; + } + if (max_value[j + 1] > max) { + max = max_value[j + 1]; + } + } + + if (index_parameter) { + if (max_index_parameter - min_index_parameter < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if (max - min < TIC_THRESHOLD) { + out_file << "set ytics " << MIN(min , 0) << ",1" << endl; + } + + out_file << "plot [" << min_index_parameter << ":" << max_index_parameter << "] [" + << MIN(min , 0) << ":" << (max >= 0. ? max * YSCALE : max * (2. - YSCALE)) << "] "; + for (k = 0;k < nb_sequence;k++) { + out_file << "\"" << label((data_file_name[length[k]].str()).c_str()) << "\" using " + << length_nb_sequence[length[k]] * (nb_variable + 1) + 1 << " : " + << length_nb_sequence[length[k]] * (nb_variable + 1) + j + 2; + if (nb_sequence <= PLOT_LEGEND_NB_SEQUENCE) { + out_file << " title \"" << identifier[k] << "\" with linespoints"; + } + else { + out_file << " notitle with linespoints"; + } + + if ((j + 1 < nb_variable) && (type[j + 1] == AUXILIARY)) { + out_file << ",\\" << endl; + out_file << "\"" << label((data_file_name[length[k]].str()).c_str()) << "\" using " + << length_nb_sequence[length[k]] * (nb_variable + 1) + 1 << " : " + << length_nb_sequence[length[k]] * (nb_variable + 1) + j + 3; + if (nb_sequence <= PLOT_LEGEND_NB_SEQUENCE) { + out_file << " title \"" << identifier[k] << "\" with lines"; + } + else { + out_file << " notitle with lines"; + } + } + + if (k < nb_sequence - 1) { + out_file << ",\\"; + } + out_file << endl; + length_nb_sequence[length[k]]++; + } + + if (max_index_parameter - min_index_parameter < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if (max_value[j] - min_value[j] < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + else { + if (max_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if (max_value[j] - min_value[j] < TIC_THRESHOLD) { + out_file << "set ytics " << MIN(min_value[j] , 0) << ",1" << endl; + } + + out_file << "plot [0:" << max_length - 1 << "] [" << MIN(min , 0) + << ":" << (max >= 0. ? max * YSCALE : max * (2. - YSCALE)) << "] "; + for (k = 0;k < nb_sequence;k++) { + out_file << "\"" << label((data_file_name[length[k]].str()).c_str()) << "\" using " + << length_nb_sequence[length[k]] * nb_variable + j + 1; + if (nb_sequence <= PLOT_LEGEND_NB_SEQUENCE) { + out_file << " title \"" << identifier[k] << "\" with linespoints"; + } + else { + out_file << " notitle with linespoints"; + } + + if ((j + 1 < nb_variable) && (type[j + 1] == AUXILIARY)) { + out_file << ",\\" << endl; + out_file << "\"" << label((data_file_name[length[k]].str()).c_str()) << "\" using " + << length_nb_sequence[length[k]] * nb_variable + j + 2; + if (nb_sequence <= PLOT_LEGEND_NB_SEQUENCE) { + out_file << " title \"" << identifier[k] << "\" with lines"; + } + else { + out_file << " notitle with lines"; + } + } + + if (k < nb_sequence - 1) { + out_file << ",\\"; + } + out_file << endl; + length_nb_sequence[length[k]]++; + } + + if (max_length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if (max_value[j] - min_value[j] - 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if ((j + 1 < nb_variable) && (type[j + 1] == AUXILIARY)) { + j++; + } + + if ((i == 0) && (((type[nb_variable - 1] != AUXILIARY) && (j < nb_variable - 1)) || + ((type[nb_variable - 1] == AUXILIARY) && (j < nb_variable - 2)))) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + + delete [] length_nb_sequence; + } + + delete [] data_file_name; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a Sequences object. + * + * \param[in] error reference on a StatError object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* Sequences::get_plotable_data(StatError &error) const + +{ + int i , j , k , m , n; + int nb_plot_set , min_index_parameter , max_index_parameter; + double min , max; + ostringstream title , legend; + MultiPlotSet *plot_set; + + + error.init(); + + if (nb_sequence > PLOT_NB_SEQUENCE) { + plot_set = NULL; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + nb_plot_set = 0; + for (i = 0;i < nb_variable;i++) { + if (type[i] != AUXILIARY) { + nb_plot_set++; + } + } + + plot_set = new MultiPlotSet(nb_plot_set); + + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + if (index_parameter) { + min_index_parameter = index_parameter_distribution->offset; + max_index_parameter = max_index_parameter_computation(true); + } + + i = 0; + for (j = 0;j < nb_variable;j++) { + if (type[j] != AUXILIARY) { + if (nb_variable > 1) { + title.str(""); + title << STAT_label[STATL_VARIABLE] << " " << i + 1; + plot[i].title = title.str(); + } + + min = min_value[j]; + max = max_value[j]; + + if (max == min) { + max = min + 1; + } + if ((j + 1 < nb_variable) && (type[j + 1] == AUXILIARY)) { + if (min_value[j + 1] < min) { + min = min_value[j + 1]; + } + if (max_value[j + 1] > max) { + max = max_value[j + 1]; + } + } + + plot[i].yrange = Range(MIN(min , 0) , (max >= 0. ? max * YSCALE : max * (2. - YSCALE))); + if (max - min < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + if ((j + 1 < nb_variable) && (type[j + 1] == AUXILIARY)) { + plot[i].resize(nb_sequence * 2); + } + else { + plot[i].resize(nb_sequence); + } + + if (index_parameter) { + plot[i].xrange = Range(min_index_parameter , max_index_parameter); + if (max_index_parameter - min_index_parameter < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + k = 0; + for (m = 0;m < nb_sequence;m++) { + plot[i][k].style = "linespoints"; + + if (nb_sequence <= PLOT_LEGEND_NB_SEQUENCE) { + legend.str(""); + legend << identifier[m]; + plot[i][k].legend = legend.str(); + } + + if (type[j] != REAL_VALUE) { + for (n = 0;n < length[m];n++) { + plot[i][k].add_point(index_parameter[m][n] , int_sequence[m][j][n]); + } + } + + else { + for (n = 0;n < length[m];n++) { + plot[i][k].add_point(index_parameter[m][n] , real_sequence[m][j][n]); + } + } + k++; + + if ((j + 1 < nb_variable) && (type[j + 1] == AUXILIARY)) { + plot[i][k].style = "lines"; + + if (nb_sequence <= PLOT_LEGEND_NB_SEQUENCE) { + legend.str(""); + legend << identifier[m]; + plot[i][k].legend = legend.str(); + } + + for (n = 0;n < length[m];n++) { + plot[i][k].add_point(index_parameter[m][n] , real_sequence[m][j + 1][n]); + } + + k++; + } + } + } + + else { + plot[i].xrange = Range(0 , max_length - 1); + if (max_length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + k = 0; + for (m = 0;m < nb_sequence;m++) { + plot[i][k].style = "linespoints"; + + if (nb_sequence <= PLOT_LEGEND_NB_SEQUENCE) { + legend.str(""); + legend << identifier[m]; + plot[i][k].legend = legend.str(); + } + + if (type[j] != REAL_VALUE) { + for (n = 0;n < length[m];n++) { + plot[i][k].add_point(n , int_sequence[m][j][n]); + } + } + + else { + for (n = 0;n < length[m];n++) { + plot[i][k].add_point(n , real_sequence[m][j][n]); + } + } + k++; + + if ((j + 1 < nb_variable) && (type[j + 1] == AUXILIARY)) { + plot[i][k].style = "lines"; + + if (nb_sequence <= PLOT_LEGEND_NB_SEQUENCE) { + legend.str(""); + legend << identifier[m]; + plot[i][k].legend = legend.str(); + } + + for (n = 0;n < length[m];n++) { + plot[i][k].add_point(n , real_sequence[m][j + 1][n]); + } + + k++; + } + } + } + + i++; + } + } + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the maximum length of sequences. + */ +/*--------------------------------------------------------------*/ + +void Sequences::max_length_computation() + +{ + int i; + + + max_length = length[0]; + for (i = 1;i < nb_sequence;i++) { + if (length[i] > max_length) { + max_length = length[i]; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the cumulative length of sequences. + */ +/*--------------------------------------------------------------*/ + +void Sequences::cumul_length_computation() + +{ + int i; + + + cumul_length = 0; + for (i = 0;i < nb_sequence;i++) { + cumul_length += length[i]; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the sequence length frequency distribution. + */ +/*--------------------------------------------------------------*/ + +void Sequences::build_length_frequency_distribution() + +{ + int i; + + + length_distribution = new FrequencyDistribution(max_length + 1); + + length_distribution->nb_element = nb_sequence; + for (i = 0;i < nb_sequence;i++) { + (length_distribution->frequency[length[i]])++; + } + + length_distribution->nb_value_computation(); + length_distribution->offset_computation(); + length_distribution->max_computation(); + length_distribution->mean_computation(); + length_distribution->variance_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of index parameters from between-index intervals. + */ +/*--------------------------------------------------------------*/ + +void Sequences::index_parameter_computation() + +{ + if ((index_param_type == TIME_INTERVAL) || (index_param_type == POSITION_INTERVAL)) { + int i , j; + + + switch (index_param_type) { + case TIME_INTERVAL : + index_param_type = TIME; + break; + case POSITION_INTERVAL : + index_param_type = POSITION; + break; + } + + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + index_parameter[i][j] += index_parameter[i][j - 1]; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the minimum value taken by the index parameter. + */ +/*--------------------------------------------------------------*/ + +int Sequences::min_index_parameter_computation() const + +{ + int i; + int min_index_parameter = I_DEFAULT; + + + if (index_parameter) { + min_index_parameter = index_parameter[0][0]; + for (i = 1;i < nb_sequence;i++) { + if (index_parameter[i][0] < min_index_parameter) { + min_index_parameter = index_parameter[i][0]; + } + } + } + + return min_index_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the maximum value taken by the index parameter. + * + * \param[in] last_position flag last position. + */ +/*--------------------------------------------------------------*/ + +int Sequences::max_index_parameter_computation(bool last_position) const + +{ + int i; + int max_index_parameter = I_DEFAULT; + + + if (index_parameter) { + if ((index_param_type == TIME) || (last_position)) { + max_index_parameter = index_parameter[0][length[0] - 1]; + for (i = 1;i < nb_sequence;i++) { + if (index_parameter[i][length[i] - 1] > max_index_parameter) { + max_index_parameter = index_parameter[i][length[i] - 1]; + } + } + } + + else { + max_index_parameter = index_parameter[0][length[0]]; + for (i = 1;i < nb_sequence;i++) { + if (index_parameter[i][length[i]] > max_index_parameter) { + max_index_parameter = index_parameter[i][length[i]]; + } + } + } + } + + return max_index_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the index parameter frequency distribution. + */ +/*--------------------------------------------------------------*/ + +void Sequences::build_index_parameter_frequency_distribution() + +{ + if (index_parameter) { + int i , j; + + index_parameter_distribution = new FrequencyDistribution(max_index_parameter_computation() + 1); + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < (index_param_type == POSITION ? length[i] + 1 : length[i]);j++) { + (index_parameter_distribution->frequency[index_parameter[i][j]])++; + } + } + + index_parameter_distribution->offset_computation(); + index_parameter_distribution->nb_element = cumul_length; + if (index_param_type == POSITION) { + index_parameter_distribution->nb_element += nb_sequence; + } + index_parameter_distribution->max_computation(); + index_parameter_distribution->mean_computation(); + index_parameter_distribution->variance_computation(); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the frequency distribution of between-index intervals. + */ +/*--------------------------------------------------------------*/ + +void Sequences::index_interval_computation() + +{ +// if ((index_param_type == TIME) || ((index_param_type == POSITION) && +// (type[0] != NB_INTERNODE))) { + if ((index_param_type == TIME) || (index_param_type == POSITION)) { + int i , j; + + + index_interval = new FrequencyDistribution(max_index_parameter_computation(true) + 1); + + // constitution of the frequency distribution of between-index intervals + + for (i = 0;i < nb_sequence;i++) { + for (j = 1;j < length[i];j++) { + (index_interval->frequency[index_parameter[i][j] - index_parameter[i][j - 1]])++; + } + } + + index_interval->nb_value_computation(); + index_interval->offset_computation(); + index_interval->nb_element = cumul_length - nb_sequence; + index_interval->max_computation(); + index_interval->mean_computation(); + index_interval->variance_computation(); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the frequency distribution of between-index intervals + * of a value of an integer-valued variable. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] value value. + * + * \return FrequencyDistribution object. + */ +/*--------------------------------------------------------------*/ + +FrequencyDistribution* Sequences::value_index_interval_computation(StatError &error , int variable , + int value) const + +{ + bool status = true; + int i , j; + int previous_index_param , *pindex_param , *pisequence; + FrequencyDistribution *value_index_interval; + + + value_index_interval = NULL; + error.init(); + + if ((index_param_type != TIME) && (index_param_type != POSITION)) { + status = false; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (!marginal_distribution[variable]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_MARGINAL_FREQUENCY_DISTRIBUTION]; + error.update((error_message.str()).c_str()); + } + + else if ((value < marginal_distribution[variable]->offset) || + (value >= marginal_distribution[variable]->nb_value) || + (marginal_distribution[variable]->frequency[value] <= 1)) { + status = false; + error.update(SEQ_error[SEQR_VALUE]); + } + } + + if (status) { + value_index_interval = new FrequencyDistribution(max_index_parameter_computation(true) + 1); + + for (i = 0;i < nb_sequence;i++) { + pindex_param = index_parameter[i]; + pisequence = int_sequence[i][variable]; + previous_index_param = I_DEFAULT; + + for (j = 0;j < length[i];j++) { + if (*pisequence == value) { + if (previous_index_param != I_DEFAULT) { + (value_index_interval->frequency[*pindex_param - previous_index_param])++; + } + previous_index_param = *pindex_param; + } + + pindex_param++; + pisequence++; + } + } + + // computation of the frequency distribution characteristics + + value_index_interval->nb_value_computation(); + value_index_interval->offset_computation(); + value_index_interval->nb_element_computation(); + + if (value_index_interval->nb_element == 0) { + delete value_index_interval; + value_index_interval = NULL; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + else { + value_index_interval->max_computation(); + value_index_interval->mean_computation(); + value_index_interval->variance_computation(); + } + } + + return value_index_interval; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the minimum value taken by a variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void Sequences::min_value_computation(int variable) + +{ + int i , j; + + + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + min_value[variable] = int_sequence[0][variable][0]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (int_sequence[i][variable][j] < min_value[variable]) { + min_value[variable] = int_sequence[i][variable][j]; + } + } + } + } + + else { + min_value[variable] = real_sequence[0][variable][0]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (real_sequence[i][variable][j] < min_value[variable]) { + min_value[variable] = real_sequence[i][variable][j]; + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the maximum value taken by a variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void Sequences::max_value_computation(int variable) + +{ + int i , j; + + + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + max_value[variable] = int_sequence[0][variable][0]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (int_sequence[i][variable][j] > max_value[variable]) { + max_value[variable] = int_sequence[i][variable][j]; + } + } + } + } + + else { + max_value[variable] = real_sequence[0][variable][0]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + if (real_sequence[i][variable][j] > max_value[variable]) { + max_value[variable] = real_sequence[i][variable][j]; + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the marginal frequency distribution for + * a positive integer-valued variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void Sequences::marginal_frequency_distribution_computation(int variable) + +{ + int i , j; + + + for (i = 0;i < marginal_distribution[variable]->nb_value;i++) { + marginal_distribution[variable]->frequency[i] = 0; + } + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + (marginal_distribution[variable]->frequency[int_sequence[i][variable][j]])++; + } + } + + marginal_distribution[variable]->offset = (int)min_value[variable]; + marginal_distribution[variable]->nb_element_computation(); +// marginal_distribution[variable]->nb_element = cumul_length; + marginal_distribution[variable]->max_computation(); + marginal_distribution[variable]->mean_computation(); + marginal_distribution[variable]->variance_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the marginal frequency distribution for + * a positive integer-valued variable + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +void Sequences::build_marginal_frequency_distribution(int variable) + +{ + if (type[variable] != AUXILIARY) { + if ((type[variable] != REAL_VALUE) && (min_value[variable] >= 0) && + (max_value[variable] <= MARGINAL_DISTRIBUTION_MAX_VALUE)) { +#ifdef DEBUG + assert(marginal_distribution != NULL); +#endif + if (marginal_distribution[variable] != NULL) + delete marginal_distribution[variable]; + marginal_distribution[variable] = new FrequencyDistribution((int)max_value[variable] + 1); + marginal_frequency_distribution_computation(variable); + } + + else { + build_marginal_histogram(variable); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the marginal histogram for a variable. + * + * \param[in] variable variable index, + * \param[in] bin_width bin width, + * \param[in] imin_value minimum value. + */ +/*--------------------------------------------------------------*/ + +void Sequences::build_marginal_histogram(int variable , double bin_width , double imin_value) + +{ + if ((!marginal_histogram[variable]) || (bin_width != marginal_histogram[variable]->bin_width) || + (imin_value != D_INF)) { + int i , j; + int *pisequence; + double *prsequence; + + // construction of the histogram + + if (bin_width == D_DEFAULT) { + bin_width = MAX(::round((max_value[variable] - min_value[variable]) * HISTOGRAM_FREQUENCY / cumul_length) , 1); + +# ifdef MESSAGE + cout << "\n" << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " - " + << STAT_label[STATL_BIN_WIDTH] << ": " << bin_width << endl; +// << " (" << min_value[variable] << ", " << max_value[variable] << ")" +# endif + + } +# ifdef DEBUG + assert(marginal_histogram != NULL); +# endif + + if (imin_value == D_INF) { + imin_value = floor(min_value[variable] / bin_width) * bin_width; + } + + if (marginal_histogram[variable]) { + marginal_histogram[variable]->nb_bin = (int)floor((max_value[variable] - imin_value) / bin_width) + 1; + + delete [] marginal_histogram[variable]->frequency; + marginal_histogram[variable]->frequency = new int[marginal_histogram[variable]->nb_bin]; + } + + else { + marginal_histogram[variable] = new Histogram((int)floor((max_value[variable] - imin_value) / bin_width) + 1 , false); + + marginal_histogram[variable]->nb_element = cumul_length; + marginal_histogram[variable]->type = type[variable]; + } + + marginal_histogram[variable]->bin_width = bin_width; + marginal_histogram[variable]->min_value = imin_value; + marginal_histogram[variable]->max_value = ceil(max_value[variable] / bin_width) * bin_width; + + // computation of bin frequencies + + for (i = 0;i < marginal_histogram[variable]->nb_bin;i++) { + marginal_histogram[variable]->frequency[i] = 0; + } + + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + for (i = 0;i < nb_sequence;i++) { + pisequence = int_sequence[i][variable]; + for (j = 0;j < length[i];j++) { +// (marginal_histogram[variable]->frequency[(int)((*pisequence++ - imin_value) / bin_width)])++; + (marginal_histogram[variable]->frequency[(int)floor((*pisequence++ - imin_value) / bin_width)])++; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + prsequence = real_sequence[i][variable]; + for (j = 0;j < length[i];j++) { +// (marginal_histogram[variable]->frequency[(int)((*prsequence++ - imin_value) / bin_width)])++; + (marginal_histogram[variable]->frequency[(int)floor((*prsequence++ - imin_value) / bin_width)])++; + } + } + } + + marginal_histogram[variable]->max_computation(); + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Change of the bin width of the marginal histogram. + * + * \param[in] error reference on a StatError object, + * \param[in] variable variable index, + * \param[in] bin_width bin width, + * \param[in] imin_value minimum value. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::select_bin_width(StatError &error , int variable , + double bin_width , double imin_value) + +{ + bool status = true; + + + error.init(); + + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (!marginal_histogram[variable]) { + status = false; + error.update(STAT_error[STATR_MARGINAL_HISTOGRAM]); + } + if ((bin_width <= 0.) || ((type[variable] != REAL_VALUE) && ((int)bin_width != bin_width))) { + status = false; + error.update(STAT_error[STATR_HISTOGRAM_BIN_WIDTH]); + } + if ((imin_value != D_INF) && ((imin_value <= min_value[variable] - bin_width) || + (imin_value > min_value[variable]) || ((type[variable] != REAL_VALUE) && + ((int)imin_value != imin_value)))) { + status = false; + error.update(STAT_error[STATR_HISTOGRAM_MIN_VALUE]); + } + } + + if (status) { + build_marginal_histogram(variable , bin_width , imin_value); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Mean computation for a variable. + * + * \param[in] variable variable index. + */ +/*--------------------------------------------------------------*/ + +double Sequences::mean_computation(int variable) const + +{ + int i , j; + double mean; + + + if (marginal_distribution[variable]) { + mean = marginal_distribution[variable]->mean; + } + + else { + mean = 0.; + + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + mean += int_sequence[i][variable][j]; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + mean += real_sequence[i][variable][j]; + } + } + } + + mean /= cumul_length; + } + + return mean; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Variance computation for a variable. + * + * \param[in] variable variable index, + * \param[in] mean mean. + * + * \return variance. + */ +/*--------------------------------------------------------------*/ + +double Sequences::variance_computation(int variable , double mean) const + +{ + int i , j; + double variance , diff; + long double square_sum; + + + if (marginal_distribution[variable]) { + variance = marginal_distribution[variable]->variance; + } + + else { + if (cumul_length > 1) { + square_sum = 0.; + + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + diff = int_sequence[i][variable][j] - mean; + square_sum += diff * diff; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + diff = real_sequence[i][variable][j] - mean; + square_sum += diff * diff; + } + } + } + + variance = square_sum / (cumul_length - 1); + } + + else { + variance = 0.; + } + } + + return variance; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mean absolute deviation for a variable. + * + * \param[in] variable variable index, + * \param[in] location location measure (e.g. mean or median). + * + * \return mean absolute deviation. + */ +/*--------------------------------------------------------------*/ + +double Sequences::mean_absolute_deviation_computation(int variable , double location) const + +{ + int i , j; + double mean_absolute_deviation; + + + if (marginal_distribution[variable]) { + mean_absolute_deviation = marginal_distribution[variable]->mean_absolute_deviation_computation(location); + } + + else { + mean_absolute_deviation = 0.; + + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + mean_absolute_deviation += fabs(int_sequence[i][variable][j] - location); + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + mean_absolute_deviation += fabs(real_sequence[i][variable][j] - location); + } + } + } + + mean_absolute_deviation /= cumul_length; + } + + return mean_absolute_deviation; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mean absolute difference for a variable. + * + * \param[in] variable variable index. + * + * \return mean absolute difference. + */ +/*--------------------------------------------------------------*/ + +double Sequences::mean_absolute_difference_computation(int variable) const + +{ + int i , j , k , m; + double mean_absolute_difference; + + + mean_absolute_difference = 0.; + + if (cumul_length > 1) { + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = j + 1;k < length[i];k++) { + mean_absolute_difference += abs(int_sequence[i][variable][j] - + int_sequence[i][variable][k]); + } + } + + for (j = i + 1;j < nb_sequence;j++) { + for (k = 0;k < length[i];k++) { + for (m = 0;m < length[j];m++) { + mean_absolute_difference += abs(int_sequence[i][variable][k] - + int_sequence[j][variable][m]); + } + } + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + for (k = j + 1;k < length[i];k++) { + mean_absolute_difference += fabs(real_sequence[i][variable][j] - + real_sequence[i][variable][k]); + } + } + + for (j = i + 1;j < nb_sequence;j++) { + for (k = 0;k < length[i];k++) { + for (m = 0;m < length[j];m++) { + mean_absolute_difference += fabs(real_sequence[i][variable][k] - + real_sequence[j][variable][m]); + } + } + } + } + } + + mean_absolute_difference = 2 * mean_absolute_difference / + (cumul_length * (double)(cumul_length - 1)); + } + + return mean_absolute_difference; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the coefficient of skewness for a variable. + * + * \param[in] variable variable index, + * \param[in] mean mean, + * \param[in] variance variance. + * + * \return coefficient of skewness. + */ +/*--------------------------------------------------------------*/ + +double Sequences::skewness_computation(int variable , double mean , double variance) const + +{ + int i , j; + double skewness , diff; + long double cube_sum; + + + if (marginal_distribution[variable]) { + skewness = marginal_distribution[variable]->skewness_computation(); + } + + else { + if ((cumul_length > 2) && (variance > 0.)) { + cube_sum = 0.; + + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + diff = int_sequence[i][variable][j] - mean; + cube_sum += diff * diff * diff; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + diff = real_sequence[i][variable][j] - mean; + cube_sum += diff * diff * diff; + } + } + } + + skewness = cube_sum * cumul_length / ((cumul_length - 1) * + (double)(cumul_length - 2) * pow(variance , 1.5)); + } + + else { + skewness = 0.; + } + } + + return skewness; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the excess kurtosis for a variable: + * excess kurtosis = coefficient of kurtosis - 3. + * + * \param[in] variable variable index, + * \param[in] mean mean, + * \param[in] variance variance. + * + * \return excess kurtosis. + */ +/*--------------------------------------------------------------*/ + +double Sequences::kurtosis_computation(int variable , double mean , double variance) const + +{ + int i , j; + double kurtosis , diff; + long double power_sum; + + + if (marginal_distribution[variable]) { + kurtosis = marginal_distribution[variable]->kurtosis_computation(); + } + + else { + if (variance > 0.) { + power_sum = 0.; + + if ((type[variable] != REAL_VALUE) && (type[variable] != AUXILIARY)) { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + diff = int_sequence[i][variable][j] - mean; + power_sum += diff * diff * diff * diff; + } + } + } + + else { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + diff = real_sequence[i][variable][j] - mean; + power_sum += diff * diff * diff * diff; + } + } + } + + kurtosis = power_sum / ((cumul_length - 1) * variance * variance) - 3.; + } + + else { + kurtosis = -2.; + } + } + + return kurtosis; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mean direction for a circular variable. + * + * \param[in] variable variable index, + * \param[in] unit unit (DEGREE/RADIAN). + * + * \return mean direction. + */ +/*--------------------------------------------------------------*/ + +double* Sequences::mean_direction_computation(int variable , angle_unit unit) const + +{ + int i , j; + double *mean_direction; + + + mean_direction = new double[4]; +// mean_direction = new double[2]; + + mean_direction[0] = 0.; + mean_direction[1] = 0.; + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + mean_direction[0] += cos(int_sequence[i][variable][j] * M_PI / 180); + mean_direction[1] += sin(int_sequence[i][variable][j] * M_PI / 180); + } + } + break; + } + + case REAL_VALUE : { + switch (unit) { + + case DEGREE : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + mean_direction[0] += cos(real_sequence[i][variable][j] * M_PI / 180); + mean_direction[1] += sin(real_sequence[i][variable][j] * M_PI / 180); + } + } + break; + } + + case RADIAN : { + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < length[i];j++) { + mean_direction[0] += cos(real_sequence[i][variable][j]); + mean_direction[1] += sin(real_sequence[i][variable][j]); + } + } + break; + } + } + break; + } + } + + mean_direction[0] /= cumul_length; + mean_direction[1] /= cumul_length; + + mean_direction[2] = sqrt(mean_direction[0] * mean_direction[0] + + mean_direction[1] * mean_direction[1]); + + if (mean_direction[2] > 0.) { + mean_direction[3] = atan(mean_direction[1] / mean_direction[0]); + + if (mean_direction[0] < 0.) { + mean_direction[3] += M_PI; + } + if (unit == DEGREE) { + mean_direction[3] *= (180 / M_PI); + } + } + + else { + mean_direction[3] = D_DEFAULT; + } + + return mean_direction; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the root mean square error or the mean absolute error for a variable. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the result, + * \param[in] variable variable index, + * \param[in] iidentifier sequence identifier, + * \param[in] robust flag computation of mean absolute error. + * + * \return root mean square error or mean absolute error. + */ +/*--------------------------------------------------------------*/ + +bool Sequences::mean_error_computation(StatError &error , ostream *os , int variable , + int iidentifier , bool robust) const + +{ + bool status = true; + int i , j; + int index; + double mean_error , diff; + long double mean_squared_error; + + + error.init(); + + if ((variable < 1) || (variable >= nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((type[variable] != INT_VALUE) && (type[variable] != REAL_VALUE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[REAL_VALUE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + if (type[variable + 1] != AUXILIARY) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[AUXILIARY]); + } + } + + if (iidentifier != I_DEFAULT) { + for (i = 0;i < nb_sequence;i++) { + if (iidentifier == identifier[i]) { + index = i; + break; + } + } + + if (i == nb_sequence) { + status = false; + error.update(SEQ_error[SEQR_SEQUENCE_IDENTIFIER]); + } + } + + else { + index = I_DEFAULT; + } + + if (status) { + if (robust) { + mean_error = 0.; + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < length[i];j++) { + mean_error += fabs(int_sequence[i][variable][j] - real_sequence[i][variable + 1][j]); + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < length[i];j++) { + mean_error += fabs(real_sequence[i][variable][j] - real_sequence[i][variable + 1][j]); + } + } + } + break; + } + } + + if (index == I_DEFAULT) { + mean_error /= cumul_length; + } + else { + mean_error /= length[index]; + } + } + + else { + mean_squared_error = 0.; + + switch (type[variable]) { + + case INT_VALUE : { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < length[i];j++) { + diff = int_sequence[i][variable][j] - real_sequence[i][variable + 1][j]; + mean_squared_error += diff * diff; + } + } + } + break; + } + + case REAL_VALUE : { + for (i = 0;i < nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + for (j = 0;j < length[i];j++) { + diff = real_sequence[i][variable][j] - real_sequence[i][variable + 1][j]; + mean_squared_error += diff * diff; + } + } + } + break; + } + } + + if (index == I_DEFAULT) { + mean_error = sqrtl(mean_squared_error / cumul_length); + } + else { + mean_error = sqrtl(mean_squared_error / length[index]); + } + } + + if (os) { + *os << "\n"; + if (((type[0] != STATE) && (nb_variable > 2)) || ((type[0] == STATE) && (nb_variable > 3))) { + *os << STAT_label[STATL_VARIABLE] << " " << variable + 1 << " "; + } + + if (robust) { + *os << SEQ_label[SEQL_MEAN_ABSOLUTE_ERROR] << ": " << mean_error << endl; + } + else { + *os << SEQ_label[SEQL_ROOT_MEAN_SQUARE_ERROR] << ": " << mean_error << endl; + } + } + } + + return status; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/smc_algorithms.cpp b/src/cpp/sequence_analysis/smc_algorithms.cpp new file mode 100644 index 0000000..506329d --- /dev/null +++ b/src/cpp/sequence_analysis/smc_algorithms.cpp @@ -0,0 +1,2629 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include + +#include + +#include "stat_tool/stat_label.h" + +#include "semi_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost::math; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the stationary distribution for an equilibrium semi-Markov chain. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::initial_probability_computation() + +{ + int i , j , k; + double sum , *state , *state_out , **state_in; + DiscreteParametric *occupancy; + + + state = new double[nb_state]; + state_out = new double[nb_state]; + + state_in = new double*[STATIONARY_PROBABILITY_LENGTH]; + for (i = 0;i < STATIONARY_PROBABILITY_LENGTH;i++) { + state_in[i] = new double[nb_state]; + } + + i = 0; + + do { + if (i > 0) { + sum = 0.; + } + + for (j = 0;j < nb_state;j++) { + if (i > 0) { + sum += fabs(state_in[i - 1][j] - state_out[j]); + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if (i == 0) { + state[j] = initial[j]; + } + else { + state[j] += state_in[i - 1][j] - state_out[j]; + } + + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (k < i + 1) { + state_out[j] += occupancy->mass[k] * state_in[i - k][j]; + } + else { + state_out[j] += forward[j]->mass[k] * initial[j]; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + state_out[j] = initial[j]; + } + else { + state_out[j] = state_in[i - 1][j]; + } + break; + } + } + } + + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + +# ifdef DEBUG +// if ((i > 0) && (i % 100 == 0)) { + cout << i << " "; + for (j = 0;j < nb_state;j++) { + cout << state[j] << " "; + } + cout << " | " << sum / nb_state << endl; +// } +# endif + + i++; + } + while (((i == 1) || (sum / nb_state > STATIONARY_PROBABILITY_THRESHOLD)) && + (i < STATIONARY_PROBABILITY_LENGTH)); + +# ifdef DEBUG + cout << "\n" << SEQ_label[SEQL_LENGTH] << ": " << i << endl; +# endif + + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : + initial[j] = state_in[i - 1][j] - state_out[j] + state[j]; +// initial[j] = state[j]; + break; + + // case Markovian state + + case MARKOVIAN : + initial[j] = state_in[i - 1][j]; +// initial[j] = state_out[j]; + break; + } + } + + // renormalization for taking account of the thresholds applied on + // the cumulative state occupancy distribution functions + + sum = 0.; + for (i = 0;i < nb_state;i++) { + sum += initial[i]; + } + for (i = 0;i < nb_state;i++) { + initial[i] /= sum; + } + + delete [] state; + delete [] state_out; + + for (i = 0;i < STATIONARY_PROBABILITY_LENGTH;i++) { + delete [] state_in[i]; + } + delete [] state_in; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of a semi-Markov chain for sequences. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double SemiMarkov::likelihood_computation(const MarkovianSequences &seq , int index) const + +{ + int i , j , k , m; + int nb_value , occupancy , *pstate , **pioutput; + double likelihood = 0. , proba , residual , **proutput; + + + // checking of the compatibility of the model with the data + + if (nb_output_process + 1 == seq.nb_variable) { + if (state_process->nb_value < seq.marginal_distribution[0]->nb_value) { + likelihood = D_INF; + } + + for (i = 0;i < nb_output_process;i++) { + if (((categorical_process[i]) || (discrete_parametric_process[i])) && + (seq.marginal_distribution[i])) { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < seq.marginal_distribution[i + 1]->nb_value) { + likelihood = D_INF; + break; + } + } + } + } + + else { + likelihood = D_INF; + } + + if (likelihood != D_INF) { + if (nb_output_process > 0) { + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + } + + for (i = 0;i < seq.nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + pstate = seq.int_sequence[i][0]; + + proba = initial[*pstate]; + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + + if (nb_output_process > 0) { + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j + 1]; + break; + } + } + } + + j = 0; + do { + if (j > 0) { + pstate++; + + proba = transition[*(pstate - 1)][*pstate]; + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + } + + if (transition[*pstate][*pstate] < 1.) { + occupancy = 1; + + if (sojourn_type[*pstate] == SEMI_MARKOVIAN) { + while ((j + occupancy < seq.length[i]) && (*(pstate + 1) == *pstate)) { + occupancy++; + pstate++; + } + + proba = 0.; + if ((type == EQUILIBRIUM) && (j == occupancy)) { + if (occupancy < forward[*pstate]->nb_value) { + if (j + occupancy < seq.length[i]) { + proba = forward[*pstate]->mass[occupancy]; + } + else { + proba = (1. - forward[*pstate]->cumul[occupancy - 1]); + } + } + } + + else { + if (occupancy < state_process->sojourn_time[*pstate]->nb_value) { + if (j + occupancy < seq.length[i]) { + proba = state_process->sojourn_time[*pstate]->mass[occupancy]; + } + else { + proba = (1. - state_process->sojourn_time[*pstate]->cumul[occupancy - 1]); + } + } + } + + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + } + } + + else { + occupancy = seq.length[i] - j; + } + + if (nb_output_process > 0) { + for (k = j;k < j + occupancy;k++) { + for (m = 0;m < nb_output_process;m++) { + if (categorical_process[m]) { + proba = categorical_process[m]->observation[*pstate]->mass[*pioutput[m]]; + } + + else if (discrete_parametric_process[m]) { + proba = discrete_parametric_process[m]->observation[*pstate]->mass[*pioutput[m]]; + } + + else { + if (((continuous_parametric_process[m]->ident == GAMMA) || + (continuous_parametric_process[m]->ident == ZERO_INFLATED_GAMMA)) && (seq.min_value[m + 1] < seq.min_interval[m + 1] / 2)) { + switch (seq.type[m + 1]) { + case INT_VALUE : + proba = continuous_parametric_process[m]->observation[*pstate]->mass_computation(*pioutput[m] , *pioutput[m] + seq.min_interval[m + 1]); + break; + case REAL_VALUE : + proba = continuous_parametric_process[m]->observation[*pstate]->mass_computation(*proutput[m] , *proutput[m] + seq.min_interval[m + 1]); + break; + } + } + + else if (continuous_parametric_process[m]->ident == LINEAR_MODEL) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[*pstate]->intercept + + continuous_parametric_process[m]->observation[*pstate]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? k : seq.index_parameter[i][k])); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[*pstate]->intercept + + continuous_parametric_process[m]->observation[*pstate]->slope * + (seq.index_param_type == IMPLICIT_TYPE ? k : seq.index_parameter[i][k])); + break; + } + + proba = continuous_parametric_process[m]->observation[*pstate]->mass_computation(residual - seq.min_interval[m + 1] / 2 , residual + seq.min_interval[m + 1] / 2); + } + + else if (continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (k == 0) { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - continuous_parametric_process[m]->observation[*pstate]->location; + break; + case REAL_VALUE : + residual = *proutput[m] - continuous_parametric_process[m]->observation[*pstate]->location; + break; + } + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + residual = *pioutput[m] - (continuous_parametric_process[m]->observation[*pstate]->location + + continuous_parametric_process[m]->observation[*pstate]->autoregressive_coeff * + (*(pioutput[m] - 1) - continuous_parametric_process[m]->observation[*pstate]->location)); + break; + case REAL_VALUE : + residual = *proutput[m] - (continuous_parametric_process[m]->observation[*pstate]->location + + continuous_parametric_process[m]->observation[*pstate]->autoregressive_coeff * + (*(proutput[m] - 1) - continuous_parametric_process[m]->observation[*pstate]->location)); + break; + } + } + + proba = continuous_parametric_process[m]->observation[*pstate]->mass_computation(residual - seq.min_interval[m + 1] / 2 , residual + seq.min_interval[m + 1] / 2); + } + + else { + switch (seq.type[m + 1]) { + case INT_VALUE : + proba = continuous_parametric_process[m]->observation[*pstate]->mass_computation(*pioutput[m] - seq.min_interval[m + 1] / 2 , *pioutput[m] + seq.min_interval[m + 1] / 2); + break; + case REAL_VALUE : + proba = continuous_parametric_process[m]->observation[*pstate]->mass_computation(*proutput[m] - seq.min_interval[m + 1] / 2 , *proutput[m] + seq.min_interval[m + 1] / 2); + break; + } + } + } + + switch (seq.type[m + 1]) { + case INT_VALUE : + pioutput[m]++; + break; + case REAL_VALUE : + proutput[m]++; + break; + } + + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + } + + if (likelihood == D_INF) { + break; + } + } + + if (likelihood == D_INF) { + break; + } + } + + j += occupancy; + } + while (j < seq.length[i]); + + if (likelihood == D_INF) { + break; + } + } + } + + if (nb_output_process > 0) { + delete [] pioutput; + delete [] proutput; + } + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of a semi-Markov chain for sequences. + * + * \param[in] seq reference on a SemiMarkovData object. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double SemiMarkov::likelihood_computation(const SemiMarkovData &seq) const + +{ + int i , j; + int nb_value; + double buff , likelihood = 0.; + FrequencyDistribution **initial_run , **final_run , **single_run; + + + // checking of the compatibility of the model with the data + + if (nb_output_process + 1 == seq.nb_variable) { + if ((!(seq.marginal_distribution[0])) || (nb_state < seq.marginal_distribution[0]->nb_value)) { + likelihood = D_INF; + } + + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < seq.marginal_distribution[i + 1]->nb_value) { + likelihood = D_INF; + break; + } + } + + else if ((continuous_parametric_process[i]->ident == LINEAR_MODEL) || + (continuous_parametric_process[i]->ident == AUTOREGRESSIVE_MODEL) || (!(seq.marginal_distribution[i + 1]))) { + likelihood = D_INF; + break; + } + } + } + + else { + likelihood = D_INF; + } + + if (likelihood != D_INF) { + likelihood = Chain::likelihood_computation(*(seq.chain_data)); + + if (likelihood != D_INF) { + if (type == EQUILIBRIUM) { + + // construction of the censored sojourn time frequency distributions + + initial_run = new FrequencyDistribution*[seq.marginal_distribution[0]->nb_value]; + for (i = 0;i < seq.marginal_distribution[0]->nb_value;i++) { + initial_run[i] = new FrequencyDistribution(seq.max_length); + } + + final_run = new FrequencyDistribution*[seq.marginal_distribution[0]->nb_value]; + for (i = 0;i < seq.marginal_distribution[0]->nb_value;i++) { + final_run[i] = new FrequencyDistribution(seq.max_length); + } + + single_run = new FrequencyDistribution*[seq.marginal_distribution[0]->nb_value]; + for (i = 0;i < seq.marginal_distribution[0]->nb_value;i++) { + single_run[i] = new FrequencyDistribution(seq.max_length + 1); + } + + // update of the censored sojourn time frequency distributions + + seq.censored_sojourn_time_frequency_distribution_computation(initial_run , final_run , single_run); + } + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + buff = state_process->sojourn_time[i]->likelihood_computation(*(seq.characteristics[0]->sojourn_time[i])); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + break; + } + + switch (type) { + + case ORDINARY : { + buff = state_process->sojourn_time[i]->survivor_likelihood_computation(*(seq.characteristics[0]->final_run[i])); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + } + break; + } + + case EQUILIBRIUM : { + buff = state_process->sojourn_time[i]->survivor_likelihood_computation(*(final_run[i])); + + if (buff != D_INF) { + likelihood += buff; + buff = forward[i]->likelihood_computation(*(initial_run[i])); + + if (buff != D_INF) { + likelihood += buff; + buff = forward[i]->survivor_likelihood_computation(*(single_run[i])); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + } + } + + else { + likelihood = D_INF; + } + } + + else { + likelihood = D_INF; + } + break; + } + } + + if (likelihood == D_INF) { + break; + } + } + } + + if (type == EQUILIBRIUM) { + for (i = 0;i < seq.marginal_distribution[0]->nb_value;i++) { + delete initial_run[i]; + } + delete [] initial_run; + + for (i = 0;i < seq.marginal_distribution[0]->nb_value;i++) { + delete final_run[i]; + } + delete [] final_run; + + for (i = 0;i < seq.marginal_distribution[0]->nb_value;i++) { + delete single_run[i]; + } + delete [] single_run; + } + } + + if (likelihood != D_INF) { + for (i = 0;i < nb_output_process;i++) { + if (categorical_process[i]) { + for (j = 0;j < nb_state;j++) { + buff = categorical_process[i]->observation[j]->likelihood_computation(*(seq.observation_distribution[i + 1][j])); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + break; + } + } + } + + else if (discrete_parametric_process[i]) { + for (j = 0;j < nb_state;j++) { + buff = discrete_parametric_process[i]->observation[j]->likelihood_computation(*(seq.observation_distribution[i + 1][j])); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + break; + } + } + } + + else { + for (j = 0;j < nb_state;j++) { + buff = continuous_parametric_process[i]->observation[j]->likelihood_computation(*(seq.observation_distribution[i + 1][j]) , + (int)seq.min_interval[i]); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + break; + } + } + } + + if (likelihood == D_INF) { + break; + } + } + } + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Counting of initial states and transitions. + * + * \param[in] chain_data reference on a ChainData object, + * \param[in] smarkov flags on the self-transition probabilities. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::transition_count_computation(const ChainData &chain_data , + const SemiMarkov *smarkov) const + +{ + int i , j; + int *pstate; + + + for (i = 0;i < chain_data.nb_state;i++) { + chain_data.initial[i] = 0; + } + + for (i = 0;i < chain_data.nb_row;i++) { + for (j = 0;j < chain_data.nb_state;j++) { + chain_data.transition[i][j] = 0; + } + } + + // extraction of initial states and transitions + + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + (chain_data.initial[*pstate])++; + + for (j = 1;j < length[i];j++) { + pstate++; + (chain_data.transition[*(pstate - 1)][*pstate])++; + } + } + + if (smarkov) { + for (i = 0;i < chain_data.nb_state;i++) { + if (smarkov->sojourn_type[i] == SEMI_MARKOVIAN) { + chain_data.transition[i][i] = 0; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the initial state and transition counts. + * + * \param[in] smarkov flags on the self-transition probabilities. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovData::build_transition_count(const SemiMarkov *smarkov) + +{ + chain_data = new ChainData(ORDINARY , marginal_distribution[0]->nb_value , + marginal_distribution[0]->nb_value); + transition_count_computation(*chain_data , smarkov); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a semi-Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] estimator estimator type for the reestimation of the state occupancy distribution + * (complete or partial likelihood), + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] nb_iter number of iterations, + * \param[in] mean_estimator method for the computation of the state occupancy + * distribution mean (equilibrium semi-Markov chain). + * + * \return SemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkov* MarkovianSequences::semi_markov_estimation(StatError &error , ostream *os , process_type itype , + censoring_estimator estimator , bool counting_flag , int nb_iter , + duration_distribution_mean_estimator mean_estimator) const + +{ + bool status = true; + int i , j; + int nb_likelihood_decrease , *occupancy_survivor , *censored_occupancy_survivor , nb_value[1]; + double likelihood , previous_likelihood , hlikelihood , occupancy_mean; + DiscreteParametric *occupancy; + Forward *forward; + Reestimation *occupancy_reestim , *length_bias_reestim; + SemiMarkov *smarkov; + SemiMarkovData *seq; + FrequencyDistribution *complete_run , *censored_run , *pfinal_run , *hreestim , + **initial_run , **final_run , **single_run; + const FrequencyDistribution *prun[3]; + + + smarkov = NULL; + error.init(); + + if ((type[0] != INT_VALUE) && (type[0] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if ((marginal_distribution[0]->nb_value < 2) || + (marginal_distribution[0]->nb_value > NB_OUTPUT)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + + else if (!characteristics[0]) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (marginal_distribution[0]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << SEQ_error[SEQR_MISSING_STATE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + } + + if (nb_variable > 1) { + if (nb_variable > 2) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , "1 or 2"); + } + + if ((type[1] != INT_VALUE) && (type[1] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (test_hidden(1)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << SEQ_error[SEQR_OVERLAP]; + error.update((error_message.str()).c_str()); + } + + if (marginal_distribution[1]->nb_value > NB_STATE) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT]); + } + +/* if (!characteristics[1]) { + for (i = 0;i < marginal_distribution[1]->nb_value;i++) { + if (marginal_distribution[1]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } */ + } + } + + if (status) { + if (max_length > COUNTING_MAX_LENGTH) { + counting_flag = false; + } + + if (itype == EQUILIBRIUM) { + + // construction of the censored sojourn time frequency distributions + + initial_run = new FrequencyDistribution*[marginal_distribution[0]->nb_value]; + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + initial_run[i] = new FrequencyDistribution(max_length); + } + + final_run = new FrequencyDistribution*[marginal_distribution[0]->nb_value]; + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + final_run[i] = new FrequencyDistribution(max_length); + } + + single_run = new FrequencyDistribution*[marginal_distribution[0]->nb_value]; + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + single_run[i] = new FrequencyDistribution(max_length + 1); + } + + // update of the censored sojourn time frequency distributions + + censored_sojourn_time_frequency_distribution_computation(initial_run , final_run , single_run); + } + + if (nb_variable == 2) { + nb_value[0] = marginal_distribution[1]->nb_value; + } + + smarkov = new SemiMarkov(itype , marginal_distribution[0]->nb_value , + nb_variable - 1 , nb_value); + smarkov->semi_markov_data = new SemiMarkovData(*this , SEQUENCE_COPY , + (itype == EQUILIBRIUM ? true : false)); + + seq = smarkov->semi_markov_data; + seq->state_variable_init(); + seq->build_transition_count(); + + for (i = 0;i < smarkov->nb_state;i++) { + if ((seq->characteristics[0]->sojourn_time[i]->nb_element > 0) || + ((itype == EQUILIBRIUM) && (initial_run[i]->nb_element > 0))) { + seq->chain_data->transition[i][i] = 0; + } + } + + // estimation of the Markov chain parameters + + seq->chain_data->estimation(*smarkov); + smarkov->component_computation(); + + if ((itype == EQUILIBRIUM) && (smarkov->nb_component > 1)) { + delete smarkov; + smarkov = NULL; + error.correction_update(STAT_parsing[STATP_CHAIN_STRUCTURE] , STAT_parsing[STATP_IRREDUCIBLE]); + } + + else { + + // estimation of the state occupancy distributions + + if (estimator != PARTIAL_LIKELIHOOD) { + occupancy_survivor = new int[max_length]; + censored_occupancy_survivor = new int[max_length + 1]; + occupancy_reestim = new Reestimation(max_length + 1); + } + +# ifdef DEBUG + assert(smarkov->sojourn_type == NULL); +# endif + + smarkov->sojourn_type = new state_sojourn_type[smarkov->nb_state]; + smarkov->state_process->absorption = new double[smarkov->nb_state]; + smarkov->state_process->sojourn_time = new DiscreteParametric*[smarkov->nb_state]; + smarkov->forward = new Forward*[smarkov->nb_state]; + for (i = 0;i < smarkov->nb_state;i++) { + smarkov->forward[i] = NULL; + smarkov->state_process->sojourn_time[i] = NULL; + } + + for (i = 0;i < smarkov->nb_state;i++) { + if ((seq->characteristics[0]->sojourn_time[i]->nb_element == 0) && + ((itype == ORDINARY) || (initial_run[i]->nb_element == 0))) { + smarkov->sojourn_type[i] = MARKOVIAN; + smarkov->state_process->absorption[i] = 1.; + } + + else { + smarkov->state_process->absorption[i] = 0.; + + if ((itype == EQUILIBRIUM) && (seq->characteristics[0]->sojourn_time[i]->nb_element == 0)) { + occupancy = NULL; + } + + else if ((estimator != PARTIAL_LIKELIHOOD) && (itype == EQUILIBRIUM) && + ((initial_run[i]->nb_element > 0) || (single_run[i]->nb_element > 0))) { + + // initialization of the state occupancy distribution + + prun[0] = seq->characteristics[0]->sojourn_time[i]; + prun[1] = seq->characteristics[0]->sojourn_time[i]; + complete_run = new FrequencyDistribution(2 , prun); + +// prun[0] = seq->initial_run[0][i]; +// prun[1] = final_run[i]; + +// prun[0] = initial_run[i]; +// prun[1] = seq->characteristics[0]->final_run[i]; +// censored_run = new FrequencyDistribution(2 , prun); + + prun[0] = initial_run[i]; + prun[1] = final_run[i]; + prun[2] = single_run[i]; + censored_run = new FrequencyDistribution(3 , prun); + + complete_run->state_occupancy_estimation(censored_run , occupancy_reestim , + occupancy_survivor , + censored_occupancy_survivor); + delete complete_run; + delete censored_run; + + if ((estimator == KAPLAN_MEIER) && (single_run[i]->nb_element == 0)) { + occupancy = occupancy_reestim->type_parametric_estimation(1 , true , OCCUPANCY_THRESHOLD); + } + + else { + occupancy = new DiscreteParametric(NEGATIVE_BINOMIAL , 1 , I_DEFAULT , 1. , + (occupancy_reestim->mean > 1. ? 1. / occupancy_reestim->mean : 0.99) , + OCCUPANCY_THRESHOLD); + occupancy->init(CATEGORICAL , I_DEFAULT , I_DEFAULT , D_DEFAULT , D_DEFAULT); + forward = new Forward(*occupancy , occupancy->alloc_nb_value); + + delete occupancy_reestim; + occupancy_reestim = new Reestimation(MAX(occupancy->alloc_nb_value , max_length + 1)); + length_bias_reestim = new Reestimation(MAX(occupancy->alloc_nb_value , max_length + 1)); + + likelihood = D_INF; + j = 0; + + do { + j++; + + // computation of the reestimation quantities of the state occupancy distribution + + occupancy->expectation_step(*(seq->characteristics[0]->sojourn_time[i]) , + *(initial_run[i]) , *(final_run[i]) , + *(single_run[i]) , occupancy_reestim , + length_bias_reestim , j); + + switch (mean_estimator) { + case COMPUTED : + occupancy_mean = interval_bisection(occupancy_reestim , length_bias_reestim); + break; + case ONE_STEP_LATE : + occupancy_mean = occupancy->mean; + break; + } + + occupancy_reestim->equilibrium_process_estimation(length_bias_reestim , occupancy , + occupancy_mean); + forward->computation(*occupancy); + + previous_likelihood = likelihood; + likelihood = occupancy->state_occupancy_likelihood_computation(*forward , *(seq->characteristics[0]->sojourn_time[i]) , + *(initial_run[i]) , *(final_run[i]) , + *(single_run[i])); + + if ((os) && ((j < 10) || ((j < 100) && (j % 10 == 0)) || ((j < 1000) && (j % 100 == 0)) || (j % 1000 == 0))) { + *os << STAT_label[STATL_ITERATION] << " " << j << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << occupancy->second_difference_norm_computation() << endl; + } + } + while ((likelihood != D_INF) && (((nb_iter == I_DEFAULT) && (j < OCCUPANCY_NB_ITER) && + ((likelihood - previous_likelihood) / -likelihood > OCCUPANCY_LIKELIHOOD_DIFF)) || + ((nb_iter != I_DEFAULT) && (j < nb_iter)))); + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << STAT_label[STATL_STATE] << " " << i << " - " + << j << " " << STAT_label[STATL_ITERATIONS] << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << occupancy->second_difference_norm_computation() << "\n" << endl; + } + + hreestim = new FrequencyDistribution(MAX(occupancy->alloc_nb_value , max_length + 1)); + + likelihood = D_INF; + nb_likelihood_decrease = 0; + + j = 0; + do { + j++; + + // computation of the reestimation quantities of the state occupancy distribution + + occupancy->expectation_step(*(seq->characteristics[0]->sojourn_time[i]) , + *(initial_run[i]) , *(final_run[i]) , + *(single_run[i]) , occupancy_reestim , + length_bias_reestim , j , true , mean_estimator); + + hreestim->update(occupancy_reestim , (int)(occupancy_reestim->nb_element * + MAX(sqrt(occupancy_reestim->variance) , 1.) * OCCUPANCY_COEFF)); + hlikelihood = hreestim->Reestimation::type_parametric_estimation(occupancy , 1 , true , + OCCUPANCY_THRESHOLD); + + if (hlikelihood == D_INF) { + likelihood = D_INF; + } + + else { + occupancy->computation(hreestim->nb_value , OCCUPANCY_THRESHOLD); + forward->copy(*occupancy); + forward->computation(*occupancy); + + previous_likelihood = likelihood; + likelihood = occupancy->state_occupancy_likelihood_computation(*forward , *(seq->characteristics[0]->sojourn_time[i]) , + *(initial_run[i]) , *(final_run[i]) , + *(single_run[i])); + + if (likelihood < previous_likelihood) { + nb_likelihood_decrease++; + } + else { + nb_likelihood_decrease = 0; + } + + if ((os) && ((j < 10) || ((j < 100) && (j % 10 == 0)) || ((j < 1000) && (j % 100 == 0)) || (j % 1000 == 0))) { + *os << STAT_label[STATL_ITERATION] << " " << j << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << occupancy->second_difference_norm_computation() << endl; + } + } + } + while ((likelihood != D_INF) && (j < OCCUPANCY_NB_ITER) && + (((likelihood - previous_likelihood) / -likelihood > OCCUPANCY_LIKELIHOOD_DIFF) || + (hlikelihood == D_INF) || (nb_likelihood_decrease == 1))); + + delete hreestim; + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << STAT_label[STATL_STATE] << " " << i << " - " + << j << " " << STAT_label[STATL_ITERATIONS] << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << occupancy->second_difference_norm_computation() << "\n" << endl; + } + } + + else { + delete occupancy; + occupancy = NULL; + } + } + + else { + delete occupancy; + occupancy = NULL; + } + + delete forward; + delete length_bias_reestim; + } + } + + else if ((estimator != PARTIAL_LIKELIHOOD) && (((itype == ORDINARY) && (seq->characteristics[0]->final_run[i]->nb_element > 0) && + (seq->characteristics[0]->final_run[i]->nb_value > seq->characteristics[0]->sojourn_time[i]->nb_value)) || ((itype == EQUILIBRIUM) && + (final_run[i]->nb_element > 0) && (final_run[i]->nb_value > seq->characteristics[0]->sojourn_time[i]->nb_value)))) { + switch (itype) { + case ORDINARY : + pfinal_run = seq->characteristics[0]->final_run[i]; + break; + case EQUILIBRIUM : + pfinal_run = final_run[i]; + break; + } + + // initialization of the state occupancy distribution + + seq->characteristics[0]->sojourn_time[i]->state_occupancy_estimation(pfinal_run , occupancy_reestim , + occupancy_survivor , + censored_occupancy_survivor); + occupancy = new DiscreteParametric(NEGATIVE_BINOMIAL , 1 , I_DEFAULT , 1. , + 1. / occupancy_reestim->mean , OCCUPANCY_THRESHOLD); + occupancy->init(CATEGORICAL , I_DEFAULT , I_DEFAULT , D_DEFAULT , D_DEFAULT); + + delete occupancy_reestim; + occupancy_reestim = new Reestimation(MAX(occupancy->alloc_nb_value , max_length + 1)); + + likelihood = D_INF; + j = 0; + + do { + j++; + + // computation of the reestimation quantities of the state occupancy distribution + + occupancy->expectation_step(*(seq->characteristics[0]->sojourn_time[i]) , + *pfinal_run , occupancy_reestim , j); + occupancy_reestim->distribution_estimation(occupancy); + + previous_likelihood = likelihood; + likelihood = occupancy->state_occupancy_likelihood_computation(*(seq->characteristics[0]->sojourn_time[i]) , + *pfinal_run); + + if ((os) && ((j < 10) || ((j < 100) && (j % 10 == 0)) || ((j < 1000) && (j % 100 == 0)) || (j % 1000 == 0))) { + *os << STAT_label[STATL_ITERATION] << " " << j << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << occupancy->second_difference_norm_computation() << endl; + } + } + while ((likelihood != D_INF) && (((nb_iter == I_DEFAULT) && (j < OCCUPANCY_NB_ITER) && + ((likelihood - previous_likelihood) / -likelihood > OCCUPANCY_LIKELIHOOD_DIFF)) || + ((nb_iter != I_DEFAULT) && (j < nb_iter)))); + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << STAT_label[STATL_STATE] << " " << i << " - " + << j << " " << STAT_label[STATL_ITERATIONS] << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << occupancy->second_difference_norm_computation() << "\n" << endl; + } + + hreestim = new FrequencyDistribution(MAX(occupancy->alloc_nb_value , max_length + 1)); + + likelihood = D_INF; + nb_likelihood_decrease = 0; + + j = 0; + do { + j++; + + // computation of the reestimation quantities of the state occupancy distribution + + occupancy->expectation_step(*(seq->characteristics[0]->sojourn_time[i]) , + *pfinal_run , occupancy_reestim , j); + + hreestim->update(occupancy_reestim , (int)(occupancy_reestim->nb_element * + MAX(sqrt(occupancy_reestim->variance) , 1.) * OCCUPANCY_COEFF)); + hlikelihood = hreestim->Reestimation::type_parametric_estimation(occupancy , 1 , true , + OCCUPANCY_THRESHOLD); + + if (hlikelihood == D_INF) { + likelihood = D_INF; + } + + else { + occupancy->computation(hreestim->nb_value , OCCUPANCY_THRESHOLD); + + previous_likelihood = likelihood; + likelihood = occupancy->state_occupancy_likelihood_computation(*(seq->characteristics[0]->sojourn_time[i]) , + *pfinal_run); + + if (likelihood < previous_likelihood) { + nb_likelihood_decrease++; + } + else { + nb_likelihood_decrease = 0; + } + + if ((os) && ((j < 10) || ((j < 100) && (j % 10 == 0)) || ((j < 1000) && (j % 100 == 0)) || (j % 1000 == 0))) { + *os << STAT_label[STATL_ITERATION] << " " << j << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << occupancy->second_difference_norm_computation() << endl; + } + } + } + while ((likelihood != D_INF) && (j < OCCUPANCY_NB_ITER) && + (((likelihood - previous_likelihood) / -likelihood > OCCUPANCY_LIKELIHOOD_DIFF) || + (hlikelihood == D_INF) || (nb_likelihood_decrease == 1))); + + delete hreestim; + + if (likelihood != D_INF) { + if (os) { + *os << "\n" << STAT_label[STATL_STATE] << " " << i << " - " + << j << " " << STAT_label[STATL_ITERATIONS] << " " + << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood << " " + << STAT_label[STATL_SMOOTHNESS] << ": " << occupancy->second_difference_norm_computation() << "\n" << endl; + } + } + + else { + delete occupancy; + occupancy = NULL; + } + } + + else { + delete occupancy; + occupancy = NULL; + } + } + + else if ((estimator != PARTIAL_LIKELIHOOD) && (((itype == ORDINARY) && (seq->characteristics[0]->final_run[i]->nb_element > 0)) || + ((itype == EQUILIBRIUM) && (final_run[i]->nb_element > 0)))) { + seq->characteristics[0]->sojourn_time[i]->state_occupancy_estimation((itype == ORDINARY ? seq->characteristics[0]->final_run[i] : final_run[i]) , + occupancy_reestim , occupancy_survivor , + censored_occupancy_survivor); + + occupancy = occupancy_reestim->type_parametric_estimation(1 , true , OCCUPANCY_THRESHOLD); + +/* occupancy = new DiscreteParametric(occupancy_reestim->nb_value); + occupancy_reestim->distribution_estimation(occupancy); */ + } + + else { + occupancy = seq->characteristics[0]->sojourn_time[i]->Reestimation::type_parametric_estimation(1 , true , + OCCUPANCY_THRESHOLD); + } + + if (occupancy) { + if (occupancy->mean == 1.) { + smarkov->sojourn_type[i] = MARKOVIAN; + } + + else { + smarkov->sojourn_type[i] = SEMI_MARKOVIAN; + smarkov->state_process->sojourn_time[i] = new DiscreteParametric(*occupancy); + if (smarkov->stype[i] == RECURRENT) { + smarkov->forward[i] = new Forward(*(smarkov->state_process->sojourn_time[i])); + } + } + + delete occupancy; + } + + else { + delete smarkov; + smarkov = NULL; + + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << i << " " + << SEQ_label[SEQL_OCCUPANCY_DISTRIBUTION] << " " + << STAT_error[STATR_ESTIMATION_FAILURE]; + error.update((error_message.str()).c_str()); + break; + } + } + } + + if (estimator != PARTIAL_LIKELIHOOD) { + delete [] occupancy_survivor; + delete [] censored_occupancy_survivor; + delete occupancy_reestim; + } + } + + if (itype == EQUILIBRIUM) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + delete initial_run[i]; + } + delete [] initial_run; + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + delete final_run[i]; + } + delete [] final_run; + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + delete single_run[i]; + } + delete [] single_run; + } + + if (smarkov) { + if (itype == EQUILIBRIUM) { + for (i = 0;i < smarkov->nb_state;i++) { + smarkov->initial[i] = 1. / (double)smarkov->nb_state; + } + smarkov->initial_probability_computation(); + } + + // estimation of categorical observation distributions + + if (smarkov->nb_output_process == 1) { + seq->build_observation_frequency_distribution(smarkov->nb_state); + + for (i = 0;i < smarkov->nb_state;i++) { + seq->observation_distribution[1][i]->distribution_estimation(smarkov->categorical_process[0]->observation[i]); + } + } + + // computation of the log-likelihood and the characteristic distributions of the estimated semi-Markov chain + + seq->likelihood = smarkov->likelihood_computation(*seq); + +# ifdef DEBUG + cout << "\n" << STAT_label[STATL_LIKELIHOOD] << ": " << seq->likelihood << " | " + << smarkov->likelihood_computation(*seq , I_DEFAULT) << endl; +# endif + + if (seq->likelihood == D_INF) { + delete smarkov; + smarkov = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + + else { + smarkov->characteristic_computation(*seq , counting_flag , I_DEFAULT , false); + } + } + } + + return(smarkov); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Comparison of semi-Markov chains for a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the results of model comparison, + * \param[in] nb_model number of semi-Markov chains, + * \param[in] ismarkov pointer on SemiMarkov objects, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::comparison(StatError &error , ostream *os , int nb_model , + const SemiMarkov **ismarkov , const string path) const + +{ + bool status = true; + int i , j; + double **likelihood; + + + error.init(); + + if ((type[0] != INT_VALUE) && (type[0] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else if (!characteristics[0]) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (marginal_distribution[0]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << SEQ_error[SEQR_MISSING_STATE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + + if (nb_variable > 1) { + if (nb_variable > 2) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , "1 or 2"); + } + + if ((type[1] != INT_VALUE) && (type[1] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (test_hidden(1)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << SEQ_error[SEQR_OVERLAP]; + error.update((error_message.str()).c_str()); + } + + if (!characteristics[1]) { + for (i = 0;i < marginal_distribution[1]->nb_value;i++) { + if (marginal_distribution[1]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + } + } + + for (i = 0;i < nb_model;i++) { + if (ismarkov[i]->nb_output_process + 1 != nb_variable) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT_PROCESS]; + error.update((error_message.str()).c_str()); + } + + else { + if (ismarkov[i]->state_process->nb_value < marginal_distribution[0]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " + << SEQ_error[SEQR_NB_STATE]; + error.update((error_message.str()).c_str()); + } + + if (nb_variable == 2) { + if (ismarkov[i]->categorical_process[0]->nb_value < marginal_distribution[1]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + + if (status) { + likelihood = new double*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + likelihood[i] = new double[nb_model]; + } + + // for each sequence, computation of the log-likelihood for each model + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_model;j++) { + likelihood[i][j] = ismarkov[j]->likelihood_computation(*this , i); + } + } + + if (os) { + likelihood_write(*os , nb_model , likelihood , SEQ_label[SEQL_SEMI_MARKOV_CHAIN] , true); + } + if (!path.empty()) { + status = likelihood_write(error , path , nb_model , likelihood , SEQ_label[SEQL_SEMI_MARKOV_CHAIN]); + } + + for (i = 0;i < nb_sequence;i++) { + delete [] likelihood[i]; + } + delete [] likelihood; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a semi-Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] divergence_flag flag on the computation of a Kullback-Leibler divergence. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* SemiMarkov::simulation(StatError &error , const FrequencyDistribution &length_distribution , + bool counting_flag , bool divergence_flag) const + +{ + bool status = true , hidden; + int i , j , k , m; + int cumul_length , occupancy , *decimal_scale , *pstate , **pioutput; + variable_nature *itype; + double buff , min_location , likelihood , **proutput; + Distribution *weight , *restoration_weight; + SemiMarkov *smarkov; + SemiMarkovData *seq; + + + seq = NULL; + error.init(); + + if ((length_distribution.nb_element < 1) || (length_distribution.nb_element > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length_distribution.offset < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length_distribution.nb_value - 1 > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + cumul_length = 0; + for (i = length_distribution.offset;i < length_distribution.nb_value;i++) { + cumul_length += i * length_distribution.frequency[i]; + } + + if (cumul_length > CUMUL_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_CUMUL_SEQUENCE_LENGTH]); + } + } + + if (status) { + if (length_distribution.nb_value - 1 > COUNTING_MAX_LENGTH) { + counting_flag = false; + } + hidden = CategoricalSequenceProcess::test_hidden(nb_output_process , categorical_process); + + // initializations + + itype = new variable_nature[nb_output_process + 1]; + + itype[0] = STATE; + for (i = 0;i < nb_output_process;i++) { + if (!continuous_parametric_process[i]) { + itype[i + 1] = INT_VALUE; + } + else { + itype[i + 1] = REAL_VALUE; + } + } + + seq = new SemiMarkovData(length_distribution , nb_output_process + 1 , itype); + delete [] itype; + + seq->semi_markov = new SemiMarkov(*this , false); + + smarkov = seq->semi_markov; + smarkov->create_cumul(); + smarkov->cumul_computation(); + + if (smarkov->nb_output_process > 0) { + pioutput = new int*[smarkov->nb_output_process]; + proutput = new double*[smarkov->nb_output_process]; + + decimal_scale = new int[smarkov->nb_output_process]; + + for (i = 0;i < smarkov->nb_output_process;i++) { + if (smarkov->continuous_parametric_process[i]) { + switch (smarkov->continuous_parametric_process[i]->ident) { + + case GAMMA : { + min_location = smarkov->continuous_parametric_process[i]->observation[0]->location * smarkov->continuous_parametric_process[i]->observation[0]->dispersion; + for (j = 1;j < smarkov->nb_state;j++) { + buff = smarkov->continuous_parametric_process[i]->observation[j]->location * smarkov->continuous_parametric_process[i]->observation[0]->dispersion; + if (buff < min_location) { + min_location = buff; + } + } + + buff = (int)ceil(log(min_location) / log(10)); + if (buff < GAMMA_MAX_NB_DECIMAL) { + decimal_scale[i] = pow(10 , (GAMMA_MAX_NB_DECIMAL - buff)); + } + else { + decimal_scale[i] = 1; + } + +# ifdef MESSAGE + cout << "\nScale: " << i + 1 << " " << decimal_scale[i] << endl; +# endif + + break; + } + + case GAUSSIAN : { + min_location = fabs(smarkov->continuous_parametric_process[i]->observation[0]->location); + for (j = 1;j < smarkov->nb_state;j++) { + buff = fabs(smarkov->continuous_parametric_process[i]->observation[j]->location); + if (buff < min_location) { + min_location = buff; + } + } + + buff = (int)ceil(log(min_location) / log(10)); + if (buff < GAUSSIAN_MAX_NB_DECIMAL) { + decimal_scale[i] = pow(10 , (GAUSSIAN_MAX_NB_DECIMAL - buff)); + } + else { + decimal_scale[i] = 1; + } + +# ifdef MESSAGE + cout << "\nScale: " << i + 1 << " " << decimal_scale[i] << endl; +# endif + + break; + } + + case VON_MISES : { + switch (smarkov->continuous_parametric_process[i]->unit) { + case DEGREE : + decimal_scale[i] = DEGREE_DECIMAL_SCALE; + break; + case RADIAN : + decimal_scale[i] = RADIAN_DECIMAL_SCALE; + break; + } + + for (j = 0;j < smarkov->nb_state;j++) { + smarkov->continuous_parametric_process[i]->observation[j]->von_mises_cumul_computation(); + } + break; + } + } + } + } + } + + for (i = 0;i < seq->nb_sequence;i++) { + pstate = seq->int_sequence[i][0]; + *pstate = cumul_method(smarkov->nb_state , smarkov->cumul_initial); + + for (j = 0;j < smarkov->nb_output_process;j++) { + switch (seq->type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq->int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq->real_sequence[i][j + 1]; + break; + } + } + + j = 0; + do { + if (j > 0) { + pstate++; + *pstate = cumul_method(smarkov->nb_state , smarkov->cumul_transition[*(pstate - 1)]); + } + + switch (smarkov->sojourn_type[*pstate]) { + + case SEMI_MARKOVIAN : { + if ((smarkov->type == EQUILIBRIUM) && (j == 0)) { + occupancy = smarkov->forward[*pstate]->simulation(); + } + else { + occupancy = smarkov->state_process->sojourn_time[*pstate]->simulation(); + } + + if (j + occupancy > seq->length[i]) { + occupancy = seq->length[i] - j; + } + break; + } + + case MARKOVIAN : { + if (smarkov->transition[*pstate][*pstate] < 1.) { + occupancy = 1; + } + else { + occupancy = seq->length[i] - j; + } + break; + } + } + + for (k = 1;k < occupancy;k++) { + pstate++; + *pstate = *(pstate - 1); + } + + for (k = j;k < j + occupancy;k++) { + for (m = 0;m < smarkov->nb_output_process;m++) { + if (smarkov->categorical_process[m]) { + *pioutput[m] = smarkov->categorical_process[m]->observation[*pstate]->simulation(); + } + + else if (smarkov->discrete_parametric_process[m]) { + *pioutput[m] = smarkov->discrete_parametric_process[m]->observation[*pstate]->simulation(); + } + + else { + if (smarkov->continuous_parametric_process[m]->ident == LINEAR_MODEL) { + *proutput[m] = smarkov->continuous_parametric_process[m]->observation[*pstate]->intercept + + smarkov->continuous_parametric_process[m]->observation[*pstate]->slope * k + +// round(smarkov->continuous_parametric_process[m]->observation[*pstate]->simulation() * decimal_scale[m]) / decimal_scale[m]; + smarkov->continuous_parametric_process[m]->observation[*pstate]->simulation(); + } + + else if (smarkov->continuous_parametric_process[m]->ident == AUTOREGRESSIVE_MODEL) { + if (k == 0) { + *proutput[m] = smarkov->continuous_parametric_process[m]->observation[*pstate]->location + +// round(smarkov->continuous_parametric_process[m]->observation[*pstate]->simulation() * decimal_scale[m]) / decimal_scale[m]; + smarkov->continuous_parametric_process[m]->observation[*pstate]->simulation(); + } + else { + *proutput[m] = smarkov->continuous_parametric_process[m]->observation[*pstate]->location + + smarkov->continuous_parametric_process[m]->observation[*pstate]->autoregressive_coeff * + (*(proutput[m] - 1) - smarkov->continuous_parametric_process[m]->observation[*pstate]->location) + +// round(smarkov->continuous_parametric_process[m]->observation[*pstate]->simulation() * decimal_scale[m]) / decimal_scale[m]; + smarkov->continuous_parametric_process[m]->observation[*pstate]->simulation(); + } + } + + else { + *proutput[m] = round(smarkov->continuous_parametric_process[m]->observation[*pstate]->simulation() * decimal_scale[m]) / decimal_scale[m]; + } + } + + switch (seq->type[m + 1]) { + case INT_VALUE : + pioutput[m]++; + break; + case REAL_VALUE : + proutput[m]++; + break; + } + } + } + + j += occupancy; + } + while (j < seq->length[i]); + } + + smarkov->remove_cumul(); + + if (smarkov->nb_output_process > 0) { + delete [] pioutput; + delete [] proutput; + + delete [] decimal_scale; + + for (i = 0;i < smarkov->nb_output_process;i++) { + if ((smarkov->continuous_parametric_process[i]) && + (smarkov->continuous_parametric_process[i]->ident == VON_MISES)) { + for (j = 0;j < smarkov->nb_state;j++) { + delete [] smarkov->continuous_parametric_process[i]->observation[j]->cumul; + smarkov->continuous_parametric_process[i]->observation[j]->cumul = NULL; + } + } + } + } + + // extraction of the characteristics of the generated sequences + + seq->min_value[0] = 0; + seq->max_value[0] = nb_state - 1; + seq->build_marginal_frequency_distribution(0); + + for (i = 1;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + seq->min_interval_computation(i); + } + + seq->build_transition_count(smarkov); + seq->build_observation_frequency_distribution(nb_state); + seq->build_observation_histogram(nb_state); + seq->build_characteristic(I_DEFAULT , true , (type == EQUILIBRIUM ? true : false)); + +/* if ((seq->max_value[0] < nb_state - 1) || (!(seq->characteristics[0]))) { + delete seq; + seq = NULL; + error.update(SEQ_error[SEQR_STATES_NOT_REPRESENTED]); + } + + else if (!divergence_flag) { */ + if (!divergence_flag) { + smarkov->characteristic_computation(*seq , counting_flag); + + // computation of the log-likelihood of the model for the generated sequences + + likelihood = smarkov->likelihood_computation(*seq); + + if (likelihood == D_INF) { + likelihood = smarkov->likelihood_computation(*seq , I_DEFAULT); + } + +# ifdef DEBUG + else { + cout << "\n" << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood + << " | " << smarkov->likelihood_computation(*seq , I_DEFAULT) << endl; + } +# endif + + if (hidden) { + seq->restoration_likelihood = likelihood; + } + else { + seq->likelihood = likelihood; + } + + // computation of the mixtures of observation distributions (theoretical weights and weights deduced from the restoration) + + if (hidden) { + weight = NULL; + restoration_weight = NULL; + + for (i = 0;i < smarkov->nb_output_process;i++) { + if ((smarkov->categorical_process[i]) || (smarkov->discrete_parametric_process[i]) || + ((smarkov->continuous_parametric_process[i]) && + (smarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL))) { + weight = smarkov->state_process->weight_computation(); + restoration_weight = seq->weight_computation(); + break; + } + } + + for (i = 0;i < smarkov->nb_output_process;i++) { + if (smarkov->categorical_process[i]) { + delete smarkov->categorical_process[i]->weight; + delete smarkov->categorical_process[i]->mixture; + smarkov->categorical_process[i]->weight = new Distribution(*weight); + smarkov->categorical_process[i]->mixture = smarkov->categorical_process[i]->mixture_computation(smarkov->categorical_process[i]->weight); + delete smarkov->categorical_process[i]->restoration_weight; + delete smarkov->categorical_process[i]->restoration_mixture; + smarkov->categorical_process[i]->restoration_weight = new Distribution(*restoration_weight); + smarkov->categorical_process[i]->restoration_mixture = smarkov->categorical_process[i]->mixture_computation(smarkov->categorical_process[i]->restoration_weight); + } + + else if (smarkov->discrete_parametric_process[i]) { + delete smarkov->discrete_parametric_process[i]->weight; + delete smarkov->discrete_parametric_process[i]->mixture; + smarkov->discrete_parametric_process[i]->weight = new Distribution(*weight); + smarkov->discrete_parametric_process[i]->mixture = smarkov->discrete_parametric_process[i]->mixture_computation(smarkov->discrete_parametric_process[i]->weight); + + delete smarkov->discrete_parametric_process[i]->restoration_weight; + delete smarkov->discrete_parametric_process[i]->restoration_mixture; + smarkov->discrete_parametric_process[i]->restoration_weight = new Distribution(*restoration_weight); + smarkov->discrete_parametric_process[i]->restoration_mixture = smarkov->discrete_parametric_process[i]->mixture_computation(smarkov->discrete_parametric_process[i]->restoration_weight); + } + + else if ((smarkov->continuous_parametric_process[i]) && + (smarkov->continuous_parametric_process[i]->ident != LINEAR_MODEL)) { + delete smarkov->continuous_parametric_process[i]->weight; + smarkov->continuous_parametric_process[i]->weight = new Distribution(*weight); + + delete smarkov->continuous_parametric_process[i]->restoration_weight; + smarkov->continuous_parametric_process[i]->restoration_weight = new Distribution(*restoration_weight); + } + } + + delete weight; + delete restoration_weight; + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a semi-Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of sequences, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* SemiMarkov::simulation(StatError &error , int nb_sequence , + int length , bool counting_flag) const + +{ + bool status = true; + SemiMarkovData *seq; + + + seq = NULL; + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + FrequencyDistribution length_distribution(length + 1); + + length_distribution.nb_element = nb_sequence; + length_distribution.offset = length; + length_distribution.max = nb_sequence; + length_distribution.mean = length; + length_distribution.variance = 0.; + length_distribution.frequency[length] = nb_sequence; + + seq = simulation(error , length_distribution , counting_flag); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a semi-Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of sequences, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return SemiMarkovData object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovData* SemiMarkov::simulation(StatError &error , int nb_sequence , + const MarkovianSequences &iseq , bool counting_flag) const + +{ + FrequencyDistribution *length_distribution; + SemiMarkovData *seq; + + + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + seq = NULL; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + length_distribution = iseq.length_distribution->frequency_scale(nb_sequence); + + seq = simulation(error , *length_distribution , counting_flag); + delete length_distribution; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between semi-Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of semi-Markov chains, + * \param[in] ismarkov pointer on SemiMarkov objects, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* SemiMarkov::divergence_computation(StatError &error , ostream *os , + int nb_model , const SemiMarkov **ismarkov , + FrequencyDistribution **length_distribution , + const string path) const + +{ + bool status = true , lstatus; + int i , j , k; + int cumul_length , nb_failure; + double **likelihood; + long double divergence; + const SemiMarkov **smarkov; + MarkovianSequences *iseq , *seq; + SemiMarkovData *simul_seq; + DistanceMatrix *dist_matrix; + ofstream *out_file; + + + dist_matrix = NULL; + error.init(); + + for (i = 0;i < nb_model - 1;i++) { + if (ismarkov[i]->type != type) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 2 << ": " + << SEQ_error[SEQR_MODEL_TYPE]; + error.update((error_message.str()).c_str()); + } + + if (ismarkov[i]->nb_output_process == nb_output_process) { + if (ismarkov[i]->nb_state != nb_state) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 2 << ": " + << SEQ_error[SEQR_NB_STATE]; + error.update((error_message.str()).c_str()); + } + + if (nb_output_process == 1) { + if (ismarkov[i]->categorical_process[0]->nb_value != categorical_process[0]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + + else if ((nb_output_process == 0) && (ismarkov[i]->nb_output_process == 1)) { + if (ismarkov[i]->categorical_process[0]->nb_value != nb_state) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + + else { // if ((nb_output_process == 1) && (ismarkov[i]->nb_output_process == 0)) + if (ismarkov[i]->nb_state != categorical_process[0]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 2 << ": " + << SEQ_error[SEQR_NB_STATE]; + error.update((error_message.str()).c_str()); + } + } + } + + for (i = 0;i < nb_model;i++) { + lstatus = true; + + if ((length_distribution[i]->nb_element < 1) || (length_distribution[i]->nb_element > NB_SEQUENCE)) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_NB_SEQUENCE]; + error.update((error_message.str()).c_str()); + } + if (length_distribution[i]->offset < 2) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + if (length_distribution[i]->nb_value - 1 > MAX_LENGTH) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + + if (!lstatus) { + status = false; + } + + else { + cumul_length = 0; + for (j = length_distribution[i]->offset;j < length_distribution[i]->nb_value;j++) { + cumul_length += j * length_distribution[i]->frequency[j]; + } + + if (cumul_length > CUMUL_LENGTH) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_CUMUL_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + out_file = NULL; + + if (!path.empty()) { + out_file = new ofstream(path.c_str()); + + if (!out_file) { + error.update(STAT_error[STATR_FILE_NAME]); + if (os) { + *os << error; + } + } + } + + smarkov = new const SemiMarkov*[nb_model]; + + smarkov[0] = this; + for (i = 1;i < nb_model;i++) { + smarkov[i] = ismarkov[i - 1]; + } + + dist_matrix = new DistanceMatrix(nb_model , SEQ_label[SEQL_SEMI_MARKOV_CHAIN]); + + for (i = 0;i < nb_model;i++) { + + // generation of a sample of sequences using a semi-Markov chain + + simul_seq = smarkov[i]->simulation(error , *length_distribution[i] , false , true); + + likelihood = new double*[simul_seq->nb_sequence]; + for (j = 0;j < simul_seq->nb_sequence;j++) { + likelihood[j] = new double[nb_model]; + } + + for (j = 0;j < simul_seq->nb_sequence;j++) { + likelihood[j][i] = smarkov[i]->likelihood_computation(*simul_seq , j); + + if ((os) && (likelihood[j][i] == D_INF)) { + *os << "\nERROR - " << SEQ_error[SEQR_REFERENCE_MODEL] << ": " << i + 1 << endl; + } + } + + if (smarkov[i]->nb_output_process == 1) { + iseq = simul_seq->remove_variable_1(); + } + else { + iseq = simul_seq; + } + + // computation of the log-likelihood of each semi-Markov chain for the sample of sequences + + for (j = 0;j < nb_model;j++) { + if (j != i) { + if (smarkov[j]->nb_output_process == 1) { + seq = iseq->transcode(error , smarkov[j]->categorical_process[0]); + } + else { + seq = iseq; + } + + divergence = 0.; + cumul_length = 0; + nb_failure = 0; + + for (k = 0;k < seq->nb_sequence;k++) { + likelihood[k][j] = smarkov[j]->likelihood_computation(*seq , k); + +// if (divergence != -D_INF) { + if (likelihood[k][j] != D_INF) { + divergence += likelihood[k][i] - likelihood[k][j]; + cumul_length += seq->length[k]; + } + else { + nb_failure++; +// divergence = -D_INF; + } +// } + } + + if ((os) && (nb_failure > 0)) { + *os << "\nWARNING - " << SEQ_error[SEQR_REFERENCE_MODEL] << ": " << i + 1 << ", " + << SEQ_error[SEQR_TARGET_MODEL] << ": " << j + 1 << " - " + << SEQ_error[SEQR_DIVERGENCE_NB_FAILURE] << ": " << nb_failure << endl; + } + +// if (divergence != -D_INF) { + dist_matrix->update(i + 1 , j + 1 , divergence , cumul_length); +// } + + if (smarkov[j]->nb_output_process == 1) { + delete seq; + } + } + } + + if (os) { + *os << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " << simul_seq->nb_sequence << " " + << SEQ_label[SEQL_SIMULATED] << " " << SEQ_label[simul_seq->nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << endl; + simul_seq->likelihood_write(*os , nb_model , likelihood , SEQ_label[SEQL_SEMI_MARKOV_CHAIN]); + } + if (out_file) { + *out_file << SEQ_label[SEQL_SEMI_MARKOV_CHAIN] << " " << i + 1 << ": " << simul_seq->nb_sequence << " " + << SEQ_label[SEQL_SIMULATED] << " " << SEQ_label[simul_seq->nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << endl; + simul_seq->likelihood_write(*out_file , nb_model , likelihood , SEQ_label[SEQL_SEMI_MARKOV_CHAIN]); + } + + for (j = 0;j < simul_seq->nb_sequence;j++) { + delete [] likelihood[j]; + } + delete [] likelihood; + + if (smarkov[i]->nb_output_process == 1) { + delete iseq; + } + delete simul_seq; + } + + if (out_file) { + out_file->close(); + delete out_file; + } + + delete smarkov; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between semi-Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of semi-Markov chains, + * \param[in] smarkov pointer on SemiMarkov objects, + * \param[in] nb_sequence number of generated sequences, + * \param[in] length sequence length, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* SemiMarkov::divergence_computation(StatError &error , ostream *os , + int nb_model , const SemiMarkov **smarkov , + int nb_sequence , int length , const string path) const + +{ + bool status = true; + int i; + FrequencyDistribution **length_distribution; + DistanceMatrix *dist_matrix; + + + dist_matrix = NULL; + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + length_distribution = new FrequencyDistribution*[nb_model]; + + length_distribution[0] = new FrequencyDistribution(length + 1); + + length_distribution[0]->nb_element = nb_sequence; + length_distribution[0]->offset = length; + length_distribution[0]->max = nb_sequence; + length_distribution[0]->mean = length; + length_distribution[0]->variance = 0.; + length_distribution[0]->frequency[length] = nb_sequence; + + for (i = 1;i < nb_model;i++) { + length_distribution[i] = new FrequencyDistribution(*length_distribution[0]); + } + + dist_matrix = divergence_computation(error , os , nb_model , smarkov , length_distribution , path); + + for (i = 0;i < nb_model;i++) { + delete length_distribution[i]; + } + delete [] length_distribution; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between semi-Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of semi-Markov chains, + * \param[in] smarkov pointer on SemiMarkov objects, + * \param[in] nb_sequence number of generated sequences, + * \param[in] seq pointer on MarkovianSequences objects, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* SemiMarkov::divergence_computation(StatError &error , ostream *os , + int nb_model , const SemiMarkov **smarkov , + int nb_sequence , const MarkovianSequences **seq , + const string path) const + +{ + int i; + FrequencyDistribution **length_distribution; + DistanceMatrix *dist_matrix; + + + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + dist_matrix = NULL; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + length_distribution = new FrequencyDistribution*[nb_model]; + for (i = 0;i < nb_model;i++) { + length_distribution[i] = seq[i]->length_distribution->frequency_scale(nb_sequence); + } + + dist_matrix = divergence_computation(error , os , nb_model , smarkov , length_distribution , path); + + for (i = 0;i < nb_model;i++) { + delete length_distribution[i]; + } + delete [] length_distribution; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the SemiMarkovIterator class. + * + * \param[in] ismarkov pointer on a SemiMarkov object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovIterator::SemiMarkovIterator(SemiMarkov *ismarkov) + +{ + semi_markov = ismarkov; + (semi_markov->nb_iterator)++; + + if ((!(semi_markov->cumul_initial)) || (!(semi_markov->cumul_transition))) { + semi_markov->create_cumul(); + semi_markov->cumul_computation(); + } + + state = I_DEFAULT; + occupancy = 0; + counter = I_DEFAULT; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a SemiMarkovIterator object. + * + * \param[in] iter reference on a SemiMarkovIterator object. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovIterator::copy(const SemiMarkovIterator &iter) + +{ + semi_markov = iter.semi_markov; + (semi_markov->nb_iterator)++; + + state = iter.state; + occupancy = iter.occupancy; + counter = iter.counter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the SemiMarkovIterator class. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovIterator::~SemiMarkovIterator() + +{ + (semi_markov->nb_iterator)--; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the SemiMarkovIterator class. + * + * \param[in] iter reference on a SemiMarkovIterator object. + * + * \return SemiMarkovIterator object. + */ +/*--------------------------------------------------------------*/ + +SemiMarkovIterator& SemiMarkovIterator::operator=(const SemiMarkovIterator &iter) + +{ + if (&iter != this) { + (semi_markov->nb_iterator)--; + copy(iter); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a semi-Markov chain. + * + * \param[in] int_seq sequence, + * \param[in] length sequence length, + * \param[in] initialization flag initialization. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool SemiMarkovIterator::simulation(int **int_seq , int length , bool initialization) + +{ + bool status; + + + if ((state == I_DEFAULT) && (!initialization)) { + status = false; + } + + else { + int i , j; + int *pstate , **pioutput; +// double **proutput; + + + status = true; + + if (semi_markov->nb_output_process > 0) { + pioutput = new int*[semi_markov->nb_output_process]; +// proutput = new double*[semi_markov->nb_output_process]; + } + + if (initialization) { + state = cumul_method(semi_markov->nb_state , semi_markov->cumul_initial); + + switch (semi_markov->sojourn_type[state]) { + + case SEMI_MARKOVIAN : { + switch (semi_markov->type) { + case ORDINARY : + occupancy = semi_markov->state_process->sojourn_time[state]->simulation(); + break; + case EQUILIBRIUM : + occupancy = semi_markov->forward[state]->simulation(); + break; + } + break; + } + + case MARKOVIAN : { + if (semi_markov->transition[state][state] < 1.) { + occupancy = 1; + } + break; + } + } + + counter = 0; + } + + pstate = int_seq[0]; + for (i = 0;i < semi_markov->nb_output_process;i++) { +/* switch (type[i + 1]) { + case INT_VALUE : */ + pioutput[i] = int_seq[i + 1]; +/* break; + case REAL_VALUE : + proutput[i] = real_seq[i + 1]; + break; + } */ + } + + for (i = 0;i < length;i++) { + counter++; + *pstate++ = state; + + for (j = 0;j < semi_markov->nb_output_process;j++) { + if (semi_markov->categorical_process[j]) { + *pioutput[j]++ = semi_markov->categorical_process[j]->observation[state]->simulation(); + } + else if (semi_markov->discrete_parametric_process[j]) { + *pioutput[j]++ = semi_markov->discrete_parametric_process[j]->observation[state]->simulation(); + } + else { +// *proutput[j]++ = semi_markov->continuous_parametric_process[j]->observation[state]->simulation(); + } + } + + if ((semi_markov->transition[state][state] < 1.) && (counter == occupancy)) { + state = cumul_method(semi_markov->nb_state , semi_markov->cumul_transition[state]); + + switch (semi_markov->sojourn_type[state]) { + case SEMI_MARKOVIAN : + occupancy = semi_markov->state_process->sojourn_time[state]->simulation(); + break; + case MARKOVIAN : + occupancy = 1; + break; + } + + counter = 0; + } + } + + if (semi_markov->nb_output_process > 0) { + delete [] pioutput; +// delete [] proutput; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a semi-Markov chain. + * + * \param[in] length sequence length, + * \param[in] initialization flag initialization. + * + * \return generated sequence. + */ +/*--------------------------------------------------------------*/ + +int** SemiMarkovIterator::simulation(int length , bool initialization) + +{ + int i; + int **int_seq; + + + if ((state == I_DEFAULT) && (!initialization)) { + int_seq = NULL; + } + + else { + int_seq = new int*[semi_markov->nb_output_process + 1]; + for (i = 0;i <= semi_markov->nb_output_process;i++) { + int_seq[i] = new int[length]; + } + + simulation(int_seq , length , initialization); + } + + return int_seq; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/smc_distributions1.cpp b/src/cpp/sequence_analysis/smc_distributions1.cpp new file mode 100644 index 0000000..826ef95 --- /dev/null +++ b/src/cpp/sequence_analysis/smc_distributions1.cpp @@ -0,0 +1,2035 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include "stat_tool/stat_label.h" + +#include "semi_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state probabilities as a function of + * the index parameter for a semi-Markov chain. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::index_state_distribution() + +{ + int i , j , k; + double sum , *state_out , **state_in; + Curves *index_state; + DiscreteParametric *occupancy; + + + index_state = state_process->index_value; + + state_out = new double[nb_state]; + + state_in = new double*[index_state->length - 1]; + for (i = 0;i < index_state->length - 1;i++) { + state_in[i] = new double[nb_state]; + } + + for (i = 0;i < index_state->length;i++) { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + if (i == 0) { + index_state->point[j][i] = initial[j]; + } + else { + index_state->point[j][i] = state_in[i - 1][j] - state_out[j] + index_state->point[j][i - 1]; + } + + if (i < index_state->length - 1) { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; +// istate = 0.; + + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (k < i + 1) { + state_out[j] += occupancy->mass[k] * state_in[i - k][j]; +// istate += (1. - occupancy->cumul[k - 1]) * state_in[i - k][j]; + } + else { + switch (type) { + case ORDINARY : + state_out[j] += occupancy->mass[k] * initial[j]; +// istate += (1. - occupancy->cumul[k - 1]) * initial[j]; + break; + case EQUILIBRIUM : + state_out[j] += forward[j]->mass[k] * initial[j]; +// istate += (1. - forward[j]->cumul[k - 1]) * initial[j]; + break; + } + } + } + +// index_state->point[j][i] = istate; + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + index_state->point[j][i] = initial[j]; + } + else { + index_state->point[j][i] = state_in[i - 1][j]; + } + + if (i < index_state->length - 1) { + state_out[j] = index_state->point[j][i]; + } + break; + } + } + } + + if (i < index_state->length - 1) { + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + } + + // renormalization for taking account of the thresholds applied on + // the cumulative state occupancy distribution functions + + sum = 0.; + for (j = 0;j < nb_state;j++) { + sum += index_state->point[j][i]; + } + + if (sum < 1.) { + for (j = 0;j < nb_state;j++) { + index_state->point[j][i] /= sum; + } + } + } + + delete [] state_out; + + for (i = 0;i < index_state->length - 1;i++) { + delete [] state_in[i]; + } + delete [] state_in; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probabilities of the memories for a semi-Markov chain + * taking account of the sequence length distribution. + * + * \return memory probabilities. + */ +/*--------------------------------------------------------------*/ + +double* SemiMarkovChain::memory_computation() const + +{ + int i , j , k; + double sum , *memory , *state_out , **state_in; + DiscreteParametric *occupancy; + + + memory = new double[nb_state]; + state_out = new double[nb_state]; + + switch (type) { + + case ORDINARY : { + state_in = new double*[state_process->length->nb_value - 3]; + for (i = 0;i < state_process->length->nb_value - 3;i++) { + state_in[i] = new double[nb_state]; + } + + for (i = 0;i < nb_state;i++) { + memory[i] = 0.; + } + + for (i = 0;i < state_process->length->nb_value - 2;i++) { + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (k < i + 1) { + state_out[j] += occupancy->mass[k] * state_in[i - k][j]; + } + else { + state_out[j] += occupancy->mass[k] * initial[j]; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + state_out[j] = initial[j]; + } + else { + state_out[j] = state_in[i - 1][j]; + } + break; + } + } + + // summation of the probabilities of the memories + + memory[j] += state_out[j] * (1. - state_process->length->cumul[i + 1]); + } + + if (i < state_process->length->nb_value - 3) { + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + } + } + + for (i = 0;i < state_process->length->nb_value - 3;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + break; + } + + case EQUILIBRIUM : { + state_in = new double*[STATIONARY_PROBABILITY_LENGTH]; + for (i = 0;i < STATIONARY_PROBABILITY_LENGTH;i++) { + state_in[i] = new double[nb_state]; + } + + i = 0; + + do { + if (i > 0) { + sum = 0.; + } + + for (j = 0;j < nb_state;j++) { + if (i > 0) { + sum += fabs(state_in[i - 1][j] - state_out[j]); + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (k < i + 1) { + state_out[j] += occupancy->mass[k] * state_in[i - k][j]; + } + else { + state_out[j] += forward[j]->mass[k] * initial[j]; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + state_out[j] = initial[j]; + } + else { + state_out[j] = state_in[i - 1][j]; + } + break; + } + } + } + + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + +# ifdef DEBUG +// if ((i > 0) && (i % 100 == 0)) { + cout << i << " "; + for (j = 0;j < nb_state;j++) { + cout << state_out[j] << " "; + } + cout << " | " << sum / nb_state << endl; +// } +# endif + + i++; + } + while (((i == 1) || (sum / nb_state > STATIONARY_PROBABILITY_THRESHOLD)) && + (i < STATIONARY_PROBABILITY_LENGTH)); + +# ifdef DEBUG + cout << "\n" << SEQ_label[SEQL_LENGTH] << ": " << i << endl; +# endif + + for (j = 0;j < nb_state;j++) { + memory[j] = state_in[i - 1][j]; + } + + for (i = 0;i < STATIONARY_PROBABILITY_LENGTH;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + break; + } + } + + delete [] state_out; + + return memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of not visiting a state + * for an ordinary semi-Markov chain. + * + * \param[in] state state, + * \param[in] increment threshold on the sum of the probabilities of leaving a state. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::state_no_occurrence_probability(int state , double increment) + +{ + int i; + + for (i = 0;i < nb_state;i++) { + if ((i != state) && (!accessibility[i][state])) { + break; + } + } + + if (i < nb_state) { + int j , k; + int min_time; + double sum , *state_out , **state_in , + &no_occurrence = state_process->no_occurrence[state]; + DiscreteParametric *occupancy; + + + state_out = new double[nb_state]; + + state_in = new double*[LEAVE_LENGTH]; + state_in[0] = NULL; + for (i = 1;i < LEAVE_LENGTH;i++) { + state_in[i] = new double[nb_state]; + } + + no_occurrence = 0.; + for (i = 0;i < nb_state;i++) { + if ((i != state) && (!accessibility[i][state])) { + no_occurrence += initial[i]; + } + } + + sum = 0.; + for (i = 0;i < nb_state;i++) { + if (i != state) { + switch (sojourn_type[i]) { + + case SEMI_MARKOVIAN : { + sum += state_process->sojourn_time[i]->mean; + break; + } + + case MARKOVIAN : { + if (transition[i][i] < 1.) { + sum += 1. / (1. - transition[i][i]); + } + break; + } + } + } + } + min_time = (int)sum + 1; + + i = 1; + + do { + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state and + // update of the probability of not visiting the selected state + + sum = 0.; + + for (j = 0;j < nb_state;j++) { + if ((j != state) && (accessibility[j][state])) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + + for (k = 1;k <= MIN(i , occupancy->nb_value - 1);k++) { + if (k < i) { + state_out[j] += occupancy->mass[k] * state_in[i - k][j]; + } + else { + state_out[j] += occupancy->mass[k] * initial[j]; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 1) { + state_out[j] = initial[j]; + } + else { + state_out[j] = state_in[i - 1][j]; + } + break; + } + } + + if ((transition[j][j] == 0.) || (transition[j][j] == 1.)) { + sum += state_out[j]; + } + else { + sum += state_out[j] * (1. - transition[j][j]); + } + + for (k = 0;k < nb_state;k++) { + if ((k != state) && (!accessibility[k][state])) { + no_occurrence += transition[j][k] * state_out[j]; + } + } + } + } + + for (j = 0;j < nb_state;j++) { + if ((j != state) && (accessibility[j][state])) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if ((k != state) && (accessibility[k][state])) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + } + } + + i++; + } + while (((sum > increment) || (i <= min_time)) && (i < LEAVE_LENGTH)); + + delete [] state_out; + + for (i = 1;i < LEAVE_LENGTH;i++) { + delete [] state_in[i]; + } + delete [] state_in; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the time to the 1st occurrence of a state + * for a semi-Markov chain. + * + * \param[in] state state, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::state_first_occurrence_distribution(int state , int min_nb_value , + double cumul_threshold) + +{ + int i , j , k; + double *state_out , **state_in , *pmass , *pcumul; + DiscreteParametric *occupancy; + Distribution *first_occurrence; + + + first_occurrence = state_process->first_occurrence[state]; + first_occurrence->complement = state_process->no_occurrence[state]; + + pmass = first_occurrence->mass; + pcumul = first_occurrence->cumul; + + state_out = new double[nb_state]; + + state_in = new double*[first_occurrence->alloc_nb_value]; + state_in[0] = NULL; + for (i = 1;i < first_occurrence->alloc_nb_value;i++) { + state_in[i] = new double[nb_state]; + } + + *pmass = initial[state]; + *pcumul = *pmass; + + i = 1; + + while (((*pcumul < cumul_threshold - first_occurrence->complement) || (i < min_nb_value)) && + (i < first_occurrence->alloc_nb_value)) { + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state and of + // the current probability mass + + *++pmass = 0.; + + for (j = 0;j < nb_state;j++) { + if (j != state) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + + for (k = 1;k <= MIN(i , occupancy->nb_value - 1);k++) { + if (k < i) { + state_out[j] += occupancy->mass[k] * state_in[i - k][j]; + } + else { + switch (type) { + case ORDINARY : + state_out[j] += occupancy->mass[k] * initial[j]; + break; + case EQUILIBRIUM : + state_out[j] += forward[j]->mass[k] * initial[j]; + break; + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 1) { + state_out[j] = initial[j]; + } + else { + state_out[j] = state_in[i - 1][j]; + } + break; + } + } + + *pmass += transition[j][state] * state_out[j]; + } + } + + for (j = 0;j < nb_state;j++) { + if (j != state) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if (k != state) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + } + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + + first_occurrence->nb_value = i; + + first_occurrence->offset_computation(); + first_occurrence->max_computation(); + first_occurrence->mean_computation(); + first_occurrence->variance_computation(); + + delete [] state_out; + + for (i = 1;i < first_occurrence->alloc_nb_value;i++) { + delete [] state_in[i]; + } + delete [] state_in; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of leaving definitively a state + * for an ordinary semi-Markov chain. + * + * \param[in] state state, + * \param[in] increment threshold on the sum of the probabilities of leaving a state. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::state_leave_probability(int state , double increment) + +{ + if (stype[state] == TRANSIENT) { + int i , j , k; + int min_time; + double sum , *state_out , **state_in , &leave = state_process->leave[state]; + DiscreteParametric *occupancy; + + + state_out = new double[nb_state]; + + state_in = new double*[LEAVE_LENGTH]; + state_in[0] = NULL; + state_in[1] = NULL; + for (i = 2;i < LEAVE_LENGTH;i++) { + state_in[i] = new double[nb_state]; + } + + leave = 0.; + for (i = 0;i < nb_state;i++) { + if ((i != state) && (!accessibility[i][state])) { + leave += transition[state][i]; + } + } + + sum = 0.; + for (i = 0;i < nb_state;i++) { + if (i != state) { + switch (sojourn_type[i]) { + case SEMI_MARKOVIAN : + sum += state_process->sojourn_time[i]->mean; + break; + case MARKOVIAN : + sum += 1. / (1. - transition[i][i]); + break; + } + } + } + min_time = (int)sum + 1; + + i = 2; + + do { + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state and + // update of the probability of leaving definitively the selected state + + sum = 0.; + + for (j = 0;j < nb_state;j++) { + if ((j != state) && (accessibility[j][state])) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + + for (k = 1;k < MIN(i , occupancy->nb_value);k++) { + if (k < i - 1) { + state_out[j] += occupancy->mass[k] * state_in[i - k][j]; + } + else { + state_out[j] += occupancy->mass[k] * transition[state][j]; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 2) { + state_out[j] = transition[state][j]; + } + else { + state_out[j] = state_in[i - 1][j]; + } + break; + } + } + + switch (sojourn_type[j]) { + case SEMI_MARKOVIAN : + sum += state_out[j]; + break; + case MARKOVIAN : + sum += state_out[j] * (1. - transition[j][j]); + break; + } + + for (k = 0;k < nb_state;k++) { + if ((k != state) && (!accessibility[k][state])) { + leave += transition[j][k] * state_out[j]; + } + } + } + } + + if (transition[state][state] > 0.) { + sum /= (1. - transition[state][state]); + } + + for (j = 0;j < nb_state;j++) { + if ((j != state) && (accessibility[j][state])) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if ((k != state) && (accessibility[k][state])) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + } + } + + i++; + } + while (((sum > increment) || (i <= min_time)) && (i < LEAVE_LENGTH)); + + if (sojourn_type[state] == SEMI_MARKOVIAN) { + leave /= state_process->sojourn_time[state]->parametric_mean_computation(); + } + + delete [] state_out; + + for (i = 2;i < LEAVE_LENGTH;i++) { + delete [] state_in[i]; + } + delete [] state_in; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the recurrence time in a state + * for a semi-Markov chain. + * + * \param[in] state state, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::state_recurrence_time_distribution(int state , int min_nb_value , + double cumul_threshold) + +{ + int i , j , k; + double occupancy_mean , *state_out , **state_in , *pmass , *pcumul; + Distribution *recurrence_time; + DiscreteParametric *occupancy; + + + recurrence_time = state_process->recurrence_time[state]; + recurrence_time->complement = state_process->leave[state]; + + pmass = recurrence_time->mass; + pcumul = recurrence_time->cumul; + *pmass = 0.; + *pcumul = 0.; + + state_out = new double[nb_state]; + + state_in = new double*[recurrence_time->alloc_nb_value]; + state_in[0] = NULL; + state_in[1] = NULL; + for (i = 2;i < recurrence_time->alloc_nb_value;i++) { + state_in[i] = new double[nb_state]; + } + + // computation of the probability mass for 1 + + switch (sojourn_type[state]) { + case SEMI_MARKOVIAN : + occupancy_mean = state_process->sojourn_time[state]->parametric_mean_computation(); + *++pmass = (occupancy_mean - 1.) / occupancy_mean; + break; + case MARKOVIAN : + *++pmass = transition[state][state]; + break; + } + + *++pcumul = *pmass; + + i = 2; + + while (((*pcumul < cumul_threshold - recurrence_time->complement) || (i < min_nb_value)) && + (i < recurrence_time->alloc_nb_value)) { + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state and of + // the current probability mass + + *++pmass = 0.; + + for (j = 0;j < nb_state;j++) { + if (j != state) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + + for (k = 1;k < MIN(i , occupancy->nb_value);k++) { + if (k < i - 1) { + state_out[j] += occupancy->mass[k] * state_in[i - k][j]; + } + else { + state_out[j] += occupancy->mass[k] * transition[state][j]; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 2) { + state_out[j] = transition[state][j]; + } + else { + state_out[j] = state_in[i - 1][j]; + } + break; + } + } + + *pmass += transition[j][state] * state_out[j]; + } + } + + for (j = 0;j < nb_state;j++) { + if (j != state) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if (k != state) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + } + } + + if (sojourn_type[state] == SEMI_MARKOVIAN) { + *pmass /= occupancy_mean; + } + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + + recurrence_time->nb_value = i; + recurrence_time->nb_value_computation(); + + delete [] state_out; + + for (i = 2;i < recurrence_time->alloc_nb_value;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + if (recurrence_time->nb_value > 0) { + recurrence_time->offset_computation(); + recurrence_time->max_computation(); + recurrence_time->mean_computation(); + recurrence_time->variance_computation(); + } + + else { + delete state_process->recurrence_time[state]; + state_process->recurrence_time[state] = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the observation probabilities as a function of + * the index parameter for a hidden semi-Markov chain. + * + * \param[in] variable observation process index. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::index_output_distribution(int variable) + +{ + int i , j , k; + Curves *index_state , *index_value; + + + index_value = categorical_process[variable]->index_value; + + // computation of the state probabilities + + if (!(state_process->index_value)) { + state_process->index_value = new Curves(nb_state , index_value->length); + index_state_distribution(); + } + index_state = state_process->index_value; + + // incorporation of the observation probabilities + + for (i = 0;i < index_value->length;i++) { + for (j = 0;j < categorical_process[variable]->nb_value;j++) { + index_value->point[j][i] = 0.; + for (k = 0;k < nb_state;k++) { + index_value->point[j][i] += categorical_process[variable]->observation[k]->mass[j] * + index_state->point[k][i]; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of not observing a value + * for a hidden ordinary semi-Markov chain. + * + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] increment threshold on the sum of the probabilities of leaving a state. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::output_no_occurrence_probability(int variable , int output , + double increment) + +{ + bool status = false , *output_accessibility; + int i , j , k; + int min_time; + double sum , *state_out , **state_in , *observation , **obs_power , + &no_occurrence = categorical_process[variable]->no_occurrence[output]; + DiscreteParametric *occupancy; + + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + // computation of the accessibility of the selected observation from a given state + + output_accessibility = new bool[nb_state]; + + for (i = 0;i < nb_state;i++) { + output_accessibility[i] = false; + + for (j = 0;j < nb_state;j++) { + if (j == i) { + if (observation[j] > 0.) { + output_accessibility[i] = true; + break; + } + } + + else { + if ((accessibility[i][j]) && (observation[j] > 0.)) { + output_accessibility[i] = true; + break; + } + } + } + + if (!output_accessibility[i]) { + status = true; + } + } + + if (status) { + obs_power = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + obs_power[i] = new double[LEAVE_LENGTH + 1]; + obs_power[i][0] = 1.; + } + } + + state_out = new double[nb_state]; + + state_in = new double*[LEAVE_LENGTH]; + for (i = 0;i < LEAVE_LENGTH;i++) { + state_in[i] = new double[nb_state]; + } + + no_occurrence = 0.; + for (i = 0;i < nb_state;i++) { + if (!output_accessibility[i]) { + no_occurrence += initial[i]; + } + } + + sum = 0.; + for (i = 0;i < nb_state;i++) { + switch (sojourn_type[i]) { + + case SEMI_MARKOVIAN : { + sum += state_process->sojourn_time[i]->mean; + break; + } + + case MARKOVIAN : { + if (transition[i][i] < 1.) { + sum += 1. / (1. - transition[i][i]); + } + break; + } + } + } + min_time = (int)sum + 1; + + i = 0; + + do { + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state and + // update of the probability of not observing the selected observation + + sum = 0.; + + for (j = 0;j < nb_state;j++) { + if (output_accessibility[j]) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + + // computation of the powers of the observation probabilities + + obs_power[j][i + 1] = obs_power[j][i] * (1. - observation[j]); + + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (k < i + 1) { + state_out[j] += obs_power[j][k] * occupancy->mass[k] * state_in[i - k][j]; + } + else { + state_out[j] += obs_power[j][k] * occupancy->mass[k] * initial[j]; + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + state_out[j] = (1. - observation[j]) * initial[j]; + } + else { + state_out[j] = (1. - observation[j]) * state_in[i - 1][j]; + } + break; + } + } + + if ((transition[j][j] == 0.) || (transition[j][j] == 1.)) { + sum += state_out[j]; + } + else { + sum += state_out[j] * (1. - transition[j][j]); + } + + for (k = 0;k < nb_state;k++) { + if (!output_accessibility[k]) { + no_occurrence += transition[j][k] * state_out[j]; + } + } + } + } + + for (j = 0;j < nb_state;j++) { + if (output_accessibility[j]) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if (output_accessibility[k]) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + } + } + + i++; + } + while (((sum > increment) || (i < min_time)) && (i < LEAVE_LENGTH)); + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + delete [] obs_power[i]; + } + } + delete [] obs_power; + + delete [] state_out; + + for (i = 0;i < LEAVE_LENGTH;i++) { + delete [] state_in[i]; + } + delete [] state_in; + } + + delete [] observation; + delete [] output_accessibility; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the time to the 1st occurrence of + * a categorical observation for a hidden semi-Markov chain. + * + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::output_first_occurrence_distribution(int variable , int output , + int min_nb_value , double cumul_threshold) + +{ + int i , j , k; + double sum , *state_out , **state_in , *observation , **obs_power , *pmass , *pcumul; + DiscreteParametric *occupancy; + Distribution *first_occurrence; + + + first_occurrence = categorical_process[variable]->first_occurrence[output]; + first_occurrence->complement = categorical_process[variable]->no_occurrence[output]; + + pmass = first_occurrence->mass - 1; + pcumul = first_occurrence->cumul - 1; + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + obs_power = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + obs_power[i] = new double[first_occurrence->alloc_nb_value + 1]; + obs_power[i][0] = 1.; + } + } + + state_out = new double[nb_state]; + + state_in = new double*[first_occurrence->alloc_nb_value]; + for (i = 0;i < first_occurrence->alloc_nb_value;i++) { + state_in[i] = new double[nb_state]; + } + + i = 0; + + do { + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state and of + // the current probability mass + + *++pmass = 0.; + + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + sum = 0.; + + // computation of the powers of the observation probabilities + + obs_power[j][i + 1] = obs_power[j][i] * (1. - observation[j]); + + for (k = 1;k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (k < i + 1) { + state_out[j] += obs_power[j][k] * occupancy->mass[k] * state_in[i - k][j]; + sum += obs_power[j][k - 1] * (1. - occupancy->cumul[k - 1]) * state_in[i - k][j]; + } + else { + switch (type) { + case ORDINARY : + state_out[j] += obs_power[j][k] * occupancy->mass[k] * initial[j]; + sum += obs_power[j][k - 1] * (1. - occupancy->cumul[k - 1]) * initial[j]; + break; + case EQUILIBRIUM : + state_out[j] += obs_power[j][k] * forward[j]->mass[k] * initial[j]; + sum += obs_power[j][k - 1] * (1. - forward[j]->cumul[k - 1]) * initial[j]; + break; + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i == 0) { + state_out[j] = (1. - observation[j]) * initial[j]; + sum = initial[j]; + } + else { + state_out[j] = (1. - observation[j]) * state_in[i - 1][j]; + sum = state_in[i - 1][j]; + } + break; + } + } + + *pmass += observation[j] * sum; + } + + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + } + + // update of the cumulative distribution function + + pcumul++; + if (i == 0) { + *pcumul = *pmass; + } + else { + *pcumul = *(pcumul - 1) + *pmass; + } + i++; + } + while (((*pcumul < cumul_threshold - first_occurrence->complement) || (i < min_nb_value)) && + (i < first_occurrence->alloc_nb_value)); + + first_occurrence->nb_value = i; + + first_occurrence->offset_computation(); + first_occurrence->max_computation(); + first_occurrence->mean_computation(); + first_occurrence->variance_computation(); + + delete [] observation; + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + delete [] obs_power[i]; + } + } + delete [] obs_power; + + delete [] state_out; + + for (i = 0;i < first_occurrence->alloc_nb_value;i++) { + delete [] state_in[i]; + } + delete [] state_in; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of leaving definitively a categorical observation + * for a hidden ordinary semi-Markov chain. + * + * \param[in] memory memory distribution, + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] increment threshold on the sum of the probabilities of leaving a state. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::output_leave_probability(const double *memory , int variable , + int output , double increment) + +{ + bool status = false , *output_accessibility; + int i , j , k; + int min_time; + double sum0 , sum1 , *observation , **obs_power , *input_proba , *state_out , + **state_in , &leave = categorical_process[variable]->leave[output]; + DiscreteParametric *occupancy; + + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + // computation of the accessibility of the selected observation from a given state + + output_accessibility = new bool[nb_state]; + + for (i = 0;i < nb_state;i++) { + output_accessibility[i] = false; + + for (j = 0;j < nb_state;j++) { + if (j == i) { + if (observation[j] > 0.) { + output_accessibility[i] = true; + break; + } + } + + else { + if ((accessibility[i][j]) && (observation[j] > 0.)) { + output_accessibility[i] = true; + break; + } + } + } + + if (!output_accessibility[i]) { + status = true; + } + } + + if (status) { + obs_power = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + obs_power[i] = new double[LEAVE_LENGTH]; + obs_power[i][0] = 1.; + } + } + + state_out = new double[nb_state]; + + state_in = new double*[LEAVE_LENGTH]; + state_in[0] = NULL; + for (i = 1;i < LEAVE_LENGTH;i++) { + state_in[i] = new double[nb_state]; + } + + // computation of the entering and exit probabilities + + input_proba = new double[nb_state]; + sum0 = 0.; + + for (i = 0;i < nb_state;i++) { + sum1 = 0.; + for (j = 0;j < nb_state;j++) { + if ((i != j) || (transition[j][j] == 1.)) { + sum1 += transition[j][i] * memory[j]; + } + } + input_proba[i] = observation[i] * (initial[i] + sum1); + + // case non-absorbing state + + if (transition[i][i] < 1.) { + switch (sojourn_type[i]) { + case SEMI_MARKOVIAN : + sum0 += state_process->sojourn_time[i]->mean * input_proba[i]; + break; + case MARKOVIAN : + sum0 += input_proba[i] / (1. - transition[i][i]); + break; + } + } + + // case absorbing state + + else { + sum0 += input_proba[i]; + } + } + + for (i = 0;i < nb_state;i++) { + input_proba[i] /= sum0; + } + + sum0 = 0.; + for (i = 0;i < nb_state;i++) { + switch (sojourn_type[i]) { + + case SEMI_MARKOVIAN : { + sum0 += state_process->sojourn_time[i]->mean; + break; + } + + case MARKOVIAN : { + if (transition[i][i] < 1.) { + sum0 += 1. / (1. - transition[i][i]); + } + break; + } + } + } + min_time = (int)sum0 + 1; + + leave = 0.; + i = 1; + + do { + + // computation of the probabilities of leaving a state and update of + // the probability of leaving definitively the selected observation + + sum0 = 0.; + + for (j = 0;j < nb_state;j++) { + if (output_accessibility[j]) { + state_out[j] = 0.; + + // case non-absorbing state + + if (transition[j][j] < 1.) { + occupancy = state_process->sojourn_time[j]; + + // computation of the powers of the observation probabilities + + obs_power[j][i] = obs_power[j][i - 1] * (1. - observation[j]); + + for (k = 1;k <= MIN(i , occupancy->nb_value - 1);k++) { + if (k < i) { + state_out[j] += obs_power[j][k] * occupancy->mass[k] * state_in[i - k][j]; + } + else { + state_out[j] += obs_power[j][k - 1] * (1. - occupancy->cumul[k - 1]) * + input_proba[j]; + } + } + + sum0 += state_out[j]; + + switch (sojourn_type[j]) { + + case SEMI_MARKOVIAN : { + for (k = 0;k < nb_state;k++) { + if (!output_accessibility[k]) { + leave += transition[j][k] * state_out[j]; + } + } + break; + } + + case MARKOVIAN : { + for (k = 0;k < nb_state;k++) { + if ((!output_accessibility[k]) && (k != j)) { + leave += transition[j][k] * state_out[j] / (1. - transition[j][j]); + } + } + break; + } + } + } + } + } + + for (j = 0;j < nb_state;j++) { + if (output_accessibility[j]) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if (output_accessibility[k]) { + if ((transition[k][k] == 0.) || (transition[k][k] == 1.)) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + else if (j != k) { + state_in[i][j] += transition[k][j] * state_out[k] / (1. - transition[k][k]); + } + } + } + } + } + + i++; + } + while (((sum0 > increment) || (i <= min_time)) && (i < LEAVE_LENGTH)); + + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + delete [] obs_power[i]; + } + } + delete [] obs_power; + + delete [] state_out; + + for (i = 1;i < LEAVE_LENGTH;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + delete [] input_proba; + } + + delete [] observation; + delete [] output_accessibility; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the recurrence time in a categorical observation + * for a hidden semi-Markov chain. + * + * \param[in] memory memory distribution, + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::output_recurrence_time_distribution(const double *memory , int variable , + int output , int min_nb_value , + double cumul_threshold) + +{ + int i , j , k , m; + double sum0 , sum1 , *observation , **obs_power , *input_proba , *output_proba , + *state_out , **state_in , *pmass , *pcumul; + Distribution *recurrence_time; + DiscreteParametric *occupancy; + + + recurrence_time = categorical_process[variable]->recurrence_time[output]; + recurrence_time->complement = categorical_process[variable]->leave[output]; + + pmass = recurrence_time->mass; + pcumul = recurrence_time->cumul; + *pmass = 0.; + *pcumul = 0.; + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + obs_power = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + obs_power[i] = new double[recurrence_time->alloc_nb_value]; + obs_power[i][0] = 1.; + } + } + + state_out = new double[nb_state]; + + state_in = new double*[recurrence_time->alloc_nb_value]; + state_in[0] = NULL; + for (i = 1;i < recurrence_time->alloc_nb_value;i++) { + state_in[i] = new double[nb_state]; + } + + // computation of the entering and exit probabilities + + input_proba = new double[nb_state]; + output_proba = new double[nb_state]; + sum0 = 0.; + + for (i = 0;i < nb_state;i++) { + sum1 = 0.; + for (j = 0;j < nb_state;j++) { + if ((i != j) || (transition[j][j] == 1.)) { + sum1 += transition[j][i] * memory[j]; + } + } + input_proba[i] = observation[i] * (initial[i] + sum1); + + // case non-absorbing state + + if (transition[i][i] < 1.) { + switch (sojourn_type[i]) { + case SEMI_MARKOVIAN : + sum0 += state_process->sojourn_time[i]->mean * input_proba[i]; + break; + case MARKOVIAN : + sum0 += input_proba[i] / (1. - transition[i][i]); + break; + } + + sum1 = 0.; + + switch (sojourn_type[i]) { + + case SEMI_MARKOVIAN : { + for (j = 0;j < nb_state;j++) { + sum1 += observation[j] * transition[i][j]; + } + break; + } + + case MARKOVIAN : { + for (j = 0;j < nb_state;j++) { + if (j != i) { + sum1 += observation[j] * transition[i][j] / (1. - transition[i][i]); + } + } + break; + } + } + + output_proba[i] = sum1; + } + + // case absorbing state + + else { + sum0 += input_proba[i]; + } + } + + for (i = 0;i < nb_state;i++) { + input_proba[i] /= sum0; + } + + i = 1; + + do { + + // computation of the probabilities of leaving a state and of the current probability mass + + *++pmass = 0.; + + for (j = 0;j < nb_state;j++) { + + // case non-absorbing state + + if (transition[j][j] < 1.) { + occupancy = state_process->sojourn_time[j]; + state_out[j] = 0.; + sum0 = 0.; + + // computation of the powers of the observation probabilities + + obs_power[j][i] = obs_power[j][i - 1] * (1. - observation[j]); + + for (k = 1;k <= MIN(i , occupancy->nb_value - 1);k++) { + if (k < i) { + state_out[j] += obs_power[j][k] * occupancy->mass[k] * state_in[i - k][j]; + sum0 += obs_power[j][k] * (1. - occupancy->cumul[k]) * state_in[i - k][j]; + } + + else { + state_out[j] += obs_power[j][k - 1] * (1. - occupancy->cumul[k - 1]) * input_proba[j]; + + sum1 = 0.; + for (m = k;m < occupancy->nb_value;m++) { + sum1 += (1. - occupancy->cumul[m]); + } + sum0 += obs_power[j][k - 1] * sum1 * input_proba[j]; + } + } + + *pmass += output_proba[j] * state_out[j] + observation[j] * sum0; + } + + // case absorbing state + + else { + if (i == 1) { + state_out[j] = input_proba[j]; + } + else { + state_out[j] = (1. - observation[j]) * state_in[i - 1][j]; + } + + *pmass += observation[j] * state_out[j]; + } + } + + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if ((transition[k][k] == 0.) || (transition[k][k] == 1.)) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + else if (j != k) { + state_in[i][j] += transition[k][j] * state_out[k] / (1. - transition[k][k]); + } + } + } + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + while (((*pcumul < cumul_threshold - recurrence_time->complement) || (i < min_nb_value)) && + (i < recurrence_time->alloc_nb_value)); + + recurrence_time->nb_value = i; + recurrence_time->nb_value_computation(); + + if (recurrence_time->nb_value > 0) { + recurrence_time->offset_computation(); + recurrence_time->max_computation(); + recurrence_time->mean_computation(); + recurrence_time->variance_computation(); + } + + else { + delete categorical_process[variable]->recurrence_time[output]; + categorical_process[variable]->recurrence_time[output] = NULL; + } + + delete [] observation; + + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + delete [] obs_power[i]; + } + } + delete [] obs_power; + + delete [] state_out; + + for (i = 1;i < recurrence_time->alloc_nb_value;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + delete [] input_proba; + delete [] output_proba; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the sojourn time in a categorical observation + * for a hidden semi-Markov chain. + * + * \param[in] memory memory distribution, + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::output_sojourn_time_distribution(const double *memory , int variable , + int output , int min_nb_value , + double cumul_threshold) + +{ + int i , j , k , m; + double sum0 , sum1 , *observation , **obs_power , **input_proba , + *output_proba , *state_out , **state_in , *pmass , *pcumul , + &absorption = categorical_process[variable]->absorption[output]; + DiscreteParametric *sojourn_time , *occupancy; + + + sojourn_time = categorical_process[variable]->sojourn_time[output]; + + pmass = sojourn_time->mass; + pcumul = sojourn_time->cumul; + *pmass = 0.; + *pcumul = 0.; + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + obs_power = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + obs_power[i] = new double[sojourn_time->alloc_nb_value]; + obs_power[i][0] = 1.; + } + } + + state_out = new double[nb_state]; + + state_in = new double*[sojourn_time->alloc_nb_value]; + state_in[0] = NULL; + for (i = 1;i < sojourn_time->alloc_nb_value;i++) { + state_in[i] = new double[nb_state]; + } + + // computation of the entering and exit probabilities + + input_proba = new double*[nb_state]; + output_proba = new double[nb_state]; + sum0 = 0.; + + for (i = 0;i < nb_state;i++) { + input_proba[i] = new double[2]; + + sum1 = 0.; + for (j = 0;j < nb_state;j++) { + if ((i != j) || (transition[j][j] == 1.)) { + sum1 += transition[j][i] * (1. - observation[j]) * memory[j]; + } + } + input_proba[i][0] = observation[i] * (initial[i] + sum1); + sum0 += input_proba[i][0]; + + // case non-absorbing state + + if (transition[i][i] < 1.) { + sum1 = 0.; + for (j = 0;j < nb_state;j++) { + if ((i != j) || (transition[j][j] == 1.)) { + sum1 += transition[j][i] * memory[j]; + } + } + input_proba[i][1] = observation[i] * (1. - observation[i]) * (initial[i] + sum1); + + switch (sojourn_type[i]) { + case SEMI_MARKOVIAN : + sum0 += (state_process->sojourn_time[i]->mean - 1) * input_proba[i][1]; + break; + case MARKOVIAN : + sum0 += (1. / (1. - transition[i][i]) - 1) * input_proba[i][1]; + break; + } + + sum1 = 0.; + + switch (sojourn_type[i]) { + + case SEMI_MARKOVIAN : { + for (j = 0;j < nb_state;j++) { + sum1 += (1. - observation[j]) * transition[i][j]; + } + break; + } + + case MARKOVIAN : { + for (j = 0;j < nb_state;j++) { + if (j != i) { + sum1 += (1. - observation[j]) * transition[i][j] / (1. - transition[i][i]); + } + } + break; + } + } + + output_proba[i] = sum1; + } + } + + for (i = 0;i < nb_state;i++) { + input_proba[i][0] /= sum0; + if (transition[i][i] < 1.) { + input_proba[i][1] /= sum0; + } + } + + i = 1; + + do { + + // computation of the probabilities of leaving a state + + absorption = 0.; + *++pmass = 0.; + + for (j = 0;j < nb_state;j++) { + state_out[j] = 0.; + + if (observation[j] > 0.) { + + // case non-absorbing state + + if (transition[j][j] < 1.) { + occupancy = state_process->sojourn_time[j]; + sum0 = 0.; + + // computation of the powers of the observation probabilities + + obs_power[j][i] = obs_power[j][i - 1] * observation[j]; + + for (k = 1;k <= MIN(i , occupancy->nb_value - 1);k++) { + if (k < i) { + state_out[j] += obs_power[j][k] * occupancy->mass[k] * state_in[i - k][j]; + sum0 += obs_power[j][k] * (1. - occupancy->cumul[k]) * state_in[i - k][j]; + } + + else { + state_out[j] += obs_power[j][k - 1] * (occupancy->mass[k] * input_proba[j][0] + + (1. - occupancy->cumul[k]) * input_proba[j][1]); + + sum1 = 0.; + for (m = k + 1;m < occupancy->nb_value;m++) { + sum1 += (1. - occupancy->cumul[m]); + } + sum0 += obs_power[j][k - 1] * ((1. - occupancy->cumul[k]) * input_proba[j][0] + + sum1 * input_proba[j][1]); + } + } + + *pmass += output_proba[j] * state_out[j] + (1. - observation[j]) * sum0; + } + + // case absorbing state + + else { + if (i == 1) { + state_out[j] = input_proba[j][0]; + } + else { + state_out[j] = observation[j] * state_in[i - 1][j]; + } + + *pmass += (1. - observation[j]) * state_out[j]; + } + + if ((transition[j][j] == 0.) || (transition[j][j] == 1.)) { + for (k = 0;k < nb_state;k++) { + if ((stype[k] == ABSORBING) && (observation[k] == 1.)) { + absorption += transition[j][k] * state_out[j]; + } + } + } + + else { + for (k = 0;k < nb_state;k++) { + if ((stype[k] == ABSORBING) && (observation[k] == 1.) && (k != j)) { + absorption += transition[j][k] * state_out[j] / (1. - transition[j][j]); + } + } + } + } + } + + for (j = 0;j < nb_state;j++) { + state_in[i][j] = 0.; + for (k = 0;k < nb_state;k++) { + if ((transition[k][k] == 0.) || (transition[k][k] == 1.)) { + state_in[i][j] += transition[k][j] * state_out[k]; + } + else if (j != k) { + state_in[i][j] += transition[k][j] * state_out[k] / (1. - transition[k][k]); + } + } + } + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + while (((*pcumul < cumul_threshold - absorption) || (i < min_nb_value)) && + (i < sojourn_time->alloc_nb_value)); + + if (*pcumul > 1.) { + +# ifdef MESSAGE + cout << STAT_label[STATL_OUTPUT] << " " << output << ": CONVERGENCE ERROR" << endl; +# endif + + } + + if (*pcumul == 0.) { + absorption = 1.; + delete categorical_process[variable]->sojourn_time[output]; + categorical_process[variable]->sojourn_time[output] = NULL; + } + + else { + sojourn_time->nb_value = i; + sojourn_time->complement = absorption; + + sojourn_time->offset_computation(); + sojourn_time->max_computation(); + sojourn_time->mean_computation(); + sojourn_time->variance_computation(); + } + + delete [] observation; + + for (i = 0;i < nb_state;i++) { + if (transition[i][i] < 1.) { + delete [] obs_power[i]; + } + } + delete [] obs_power; + + delete [] state_out; + + for (i = 1;i < sojourn_time->alloc_nb_value;i++) { + delete [] state_in[i]; + } + delete [] state_in; + + for (i = 0;i < nb_state;i++) { + delete [] input_proba[i]; + } + delete [] input_proba; + + delete [] output_proba; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/smc_distributions2.cpp b/src/cpp/sequence_analysis/smc_distributions2.cpp new file mode 100644 index 0000000..6e094db --- /dev/null +++ b/src/cpp/sequence_analysis/smc_distributions2.cpp @@ -0,0 +1,1297 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include "semi_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mixture of the distributions of the number of runs (RUN) or + * occurrences (OCCURRENCE) of a state for a sequence length mixing distribution and + * a semi-Markov chain. + * + * \param[in] state state, + * \param[in] pattern count pattern type. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkovChain::state_nb_pattern_mixture(int state , count_pattern pattern) + +{ + int i , j , k , m; + int max_length , index_nb_pattern , previous_nb_pattern , increment; + double sum , *pmass , *lmass , **state_out , *pstate_out , ***state_in; + Distribution *pdist; + DiscreteParametric *occupancy; + + + switch (pattern) { + case RUN : + pdist = state_process->nb_run[state]; + break; + case OCCURRENCE : + pdist = state_process->nb_occurrence[state]; + break; + } + + pmass = pdist->mass; + for (i = 0;i < pdist->nb_value;i++) { + *pmass++ = 0.; + } + + max_length = state_process->length->nb_value - 1; + + state_out = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + state_out[i] = new double[pattern == OCCURRENCE ? max_length : (max_length + 1) / 2 + 1]; + } + + state_in = new double**[max_length - 1]; + index_nb_pattern = 1; + + for (i = 0;i < max_length - 1;i++) { + state_in[i] = new double*[nb_state]; + for (j = 0;j < nb_state;j++) { + state_in[i][j] = new double[index_nb_pattern + 1]; + } + if ((pattern == OCCURRENCE) || (i % 2 == 1)) { + index_nb_pattern++; + } + } + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state as + // a function of the number of runs or occurrences of the selected state + + lmass = state_process->length->mass; + index_nb_pattern = 1; + + for (i = 0;i < max_length;i++) { + lmass++; + + for (j = 0;j < nb_state;j++) { + + // initialization of the probabilities of leaving a state at time i + + if (i < max_length - 1) { + pstate_out = state_out[j]; + for (k = 0;k <= index_nb_pattern;k++) { + *pstate_out++ = 0.; + } + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + + for (k = (*lmass > 0. ? 1 : occupancy->offset);k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + switch (pattern) { + case RUN : + increment = 1; + break; + case OCCURRENCE : + increment = k; + break; + } + + if (i < max_length - 1) { + pstate_out = state_out[j]; + if (j == state) { + pstate_out += increment; + } + } + if (*lmass > 0.) { + pmass = pdist->mass; + if (j == state) { + pmass += increment; + } + } + + if (k < i + 1) { + switch (pattern) { + + case RUN : { + if ((j == state) && (k == 1) && (i % 2 == 1)) { + previous_nb_pattern = index_nb_pattern - 1; + } + else { + previous_nb_pattern = (i - k) / 2 + 1; + } + break; + } + + case OCCURRENCE : { + previous_nb_pattern = i - k + 1; + break; + } + } + + if (i < max_length - 1) { + for (m = 0;m <= previous_nb_pattern;m++) { + *pstate_out++ += occupancy->mass[k] * state_in[i - k][j][m]; + } + } + if (*lmass > 0.) { + for (m = 0;m <= previous_nb_pattern;m++) { + *pmass++ += *lmass * (1. - occupancy->cumul[k - 1]) * state_in[i - k][j][m]; + } + } + } + + else { + if (i < max_length - 1) { + switch (type) { + case ORDINARY : + *pstate_out += occupancy->mass[k] * initial[j]; + break; + case EQUILIBRIUM : + *pstate_out += forward[j]->mass[k] * initial[j]; + break; + } + } + + if (*lmass > 0.) { + switch (type) { + case ORDINARY : + *pmass += *lmass * (1. - occupancy->cumul[k - 1]) * initial[j]; + break; + case EQUILIBRIUM : + *pmass += *lmass * (1. - forward[j]->cumul[k - 1]) * initial[j]; + break; + } + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (i < max_length - 1) { + pstate_out = state_out[j]; + if (j == state) { + pstate_out++; + } + } + + if (*lmass > 0.) { + pmass = pdist->mass; + if (j == state) { + pmass++; + } + } + + if (i == 0) { + *pstate_out = initial[j]; + if (*lmass > 0.) { + *pmass += *lmass * initial[j]; + } + } + + else { + switch (pattern) { + + case RUN : { + if ((j == state) && (i % 2 == 1)) { + previous_nb_pattern = index_nb_pattern - 1; + } + else { + previous_nb_pattern = (i - 1) / 2 + 1; + } + break; + } + + case OCCURRENCE : { + previous_nb_pattern = i; + break; + } + } + + if (i < max_length - 1) { + for (k = 0;k <= previous_nb_pattern;k++) { + *pstate_out++ = state_in[i - 1][j][k]; + } + } + if (*lmass > 0.) { + for (k = 0;k <= previous_nb_pattern;k++) { + *pmass++ += *lmass * state_in[i - 1][j][k]; + } + } + } + break; + } + } + } + + if (i < max_length - 1) { + for (j = 0;j < nb_state;j++) { + for (k = 0;k <= index_nb_pattern;k++) { + state_in[i][j][k] = 0.; + for (m = 0;m < nb_state;m++) { + if ((pattern == OCCURRENCE) || (j != state) || (j != m)) { + state_in[i][j][k] += transition[m][j] * state_out[m][k]; + } + else if (k < index_nb_pattern) { + state_in[i][j][k] += transition[m][j] * state_out[m][k + 1]; + } + } + } + } + } + + if ((pattern == OCCURRENCE) || (i % 2 == 1)) { + index_nb_pattern++; + } + } + + // renormalization of the mixture of the distributions of the number of runs or + // occurrences of the selected state for taking account of the thresholds applied on + // the cumulative state occupancy distribution functions + + pmass = pdist->mass; + sum = 0.; + for (i = 0;i < pdist->nb_value;i++) { + sum += *pmass++; + } + + if (sum < 1.) { + pmass = pdist->mass; + for (i = 0;i < pdist->nb_value;i++) { + *pmass++ /= sum; + } + } + + pdist->nb_value_computation(); + pdist->offset_computation(); + pdist->cumul_computation(); + + pdist->max_computation(); + pdist->mean_computation(); + pdist->variance_computation(); + + for (i = 0;i < nb_state;i++) { + delete [] state_out[i]; + state_out[i] = NULL; + } + delete [] state_out; + state_out = NULL; + + for (i = 0;i < max_length - 1;i++) { + for (j = 0;j < nb_state;j++) { + delete [] state_in[i][j]; + state_in[i][j] = NULL; + } + delete [] state_in[i]; + state_in[i] = NULL; + } + delete [] state_in; + state_in = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mixture of the distributions of the number of runs of + * a categorical observation for a sequence length mixing distribution and + * a hidden semi-Markov chain. + * + * \param[in] variable observation process index, + * \param[in] output observation. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::output_nb_run_mixture(int variable , int output) + +{ + int i , j , k , m , n; + int max_length , index_nb_pattern , min , max; + double sum0 , sum1 , *pmass , *lmass , **state_out , ***state_in , ***state_nb_run; + Distribution *nb_run; + DiscreteParametric *occupancy; + + + nb_run = categorical_process[variable]->nb_run[output]; + + pmass = nb_run->mass; + for (i = 0;i < nb_run->nb_value;i++) { + *pmass++ = 0.; + } + + max_length = categorical_process[variable]->length->nb_value - 1; + + state_out = new double*[nb_state * 2]; + for (i = 0;i < nb_state * 2;i++) { + state_out[i] = new double[(max_length + 1) / 2 + 1]; + } + + state_in = new double**[max_length - 1]; + index_nb_pattern = 1; + + for (i = 0;i < max_length - 1;i++) { + state_in[i] = new double*[nb_state * 2]; + for (j = 0;j < nb_state * 2;j++) { + state_in[i][j] = new double[index_nb_pattern + 1]; + } + if (i % 2 == 1) { + index_nb_pattern++; + } + } + + // computation of the distributions of the number of runs of the selected observation + // for the different times spent in a state taking account of the observation emitted + // before entering in the state + + state_nb_run = new double**[nb_state * 2]; + + for (i = 0;i < nb_state * 2;i++) { + if (sojourn_type[i / 2] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[i / 2]; + state_nb_run[i] = new double*[MIN(max_length + 1 , occupancy->nb_value) * 2]; + + state_nb_run[i][0] = NULL; + state_nb_run[i][1] = NULL; + index_nb_pattern = 1; + for (j = 1;j < MIN(max_length + 1 , occupancy->nb_value);j++) { + state_nb_run[i][j * 2] = new double[index_nb_pattern + 1]; + state_nb_run[i][j * 2 + 1] = new double[index_nb_pattern + 1]; + if (j % 2 == 1) { + index_nb_pattern++; + } + } + } + } + + for (i = 0;i < nb_state * 2;i++) { + if (sojourn_type[i / 2] == SEMI_MARKOVIAN) { + switch (i % 2) { + + case 0 : { + state_nb_run[i][2][0] = 1. - categorical_process[variable]->observation[i / 2]->mass[output]; + state_nb_run[i][2][1] = 0.; + state_nb_run[i][3][0] = 0.; + state_nb_run[i][3][1] = categorical_process[variable]->observation[i / 2]->mass[output]; + break; + } + + case 1 : { + state_nb_run[i][2][0] = 1. - categorical_process[variable]->observation[i / 2]->mass[output]; + state_nb_run[i][2][1] = 0.; + state_nb_run[i][3][0] = categorical_process[variable]->observation[i / 2]->mass[output]; + state_nb_run[i][3][1] = 0.; + break; + } + } + + occupancy = state_process->sojourn_time[i / 2]; + index_nb_pattern = 1; + + for (j = 2;j < MIN(max_length + 1 , occupancy->nb_value);j++) { + for (k = 0;k <= index_nb_pattern;k++) { + state_nb_run[i][j * 2][k] = (1. - categorical_process[variable]->observation[i / 2]->mass[output]) * + (state_nb_run[i][j * 2 - 2][k] + state_nb_run[i][j * 2 - 1][k]); + + sum0 = state_nb_run[i][j * 2 - 1][k]; + if (k > 0) { + sum0 += state_nb_run[i][j * 2 - 2][k - 1]; + } + state_nb_run[i][j * 2 + 1][k] = categorical_process[variable]->observation[i / 2]->mass[output] * sum0; + } + + if (j % 2 == 0) { + index_nb_pattern++; + state_nb_run[i][j * 2][index_nb_pattern] = 0.; + state_nb_run[i][j * 2 + 1][index_nb_pattern] = 0.; + } + } + } + } + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state as + // a function of the number of runs of the selected observation + + lmass = categorical_process[variable]->length->mass; + index_nb_pattern = 1; + + for (i = 0;i < max_length;i++) { + lmass++; + + // initialization of the probabilities of leaving a state at time i + + for (j = 0;j < nb_state * 2;j++) { + for (k = 0;k <= index_nb_pattern;k++) { + state_out[j][k] = 0.; + } + } + + for (j = 0;j < nb_state;j++) { + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + + for (k = (*lmass > 0. ? 1 : occupancy->offset);k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (*lmass > 0.) { + pmass = nb_run->mass; + } + + for (m = 0;m <= index_nb_pattern;m++) { + if (k < i + 1) { + min = MAX(m - ((i - k) / 2 + 1) , 0); + max = MIN((k % 2 == 0 ? k / 2 : k / 2 + 1) , m); + + if (max >= min) { + sum0 = 0.; + sum1 = 0.; + for (n = min;n <= max;n++) { + sum0 += state_nb_run[j * 2][k * 2][n] * state_in[i - k][j * 2][m - n] + + state_nb_run[j * 2 + 1][k * 2][n] * state_in[i - k][j * 2 + 1][m - n]; + sum1 += state_nb_run[j * 2][k * 2 + 1][n] * state_in[i - k][j * 2][m - n] + + state_nb_run[j * 2 + 1][k * 2 + 1][n] * state_in[i - k][j * 2 + 1][m - n]; + } + + if (i < max_length - 1) { + state_out[j * 2][m] += occupancy->mass[k] * sum0; + state_out[j * 2 + 1][m] += occupancy->mass[k] * sum1; + } + if (*lmass > 0.) { + *pmass += *lmass * (1. - occupancy->cumul[k - 1]) * (sum0 + sum1); + } + } + } + + else { + sum0 = state_nb_run[j * 2][k * 2][m] * initial[j]; + sum1 = state_nb_run[j * 2][k * 2 + 1][m] * initial[j]; + + if (i < max_length - 1) { + switch (type) { + case ORDINARY : + state_out[j * 2][m] += occupancy->mass[k] * sum0; + state_out[j * 2 + 1][m] += occupancy->mass[k] * sum1; + break; + case EQUILIBRIUM : + state_out[j * 2][m] += forward[j]->mass[k] * sum0; + state_out[j * 2 + 1][m] += forward[j]->mass[k] * sum1; + break; + } + } + + if (*lmass > 0.) { + switch (type) { + case ORDINARY : + *pmass += *lmass * (1. - occupancy->cumul[k - 1]) * (sum0 + sum1); + break; + case EQUILIBRIUM : + *pmass += *lmass * (1. - forward[j]->cumul[k - 1]) * (sum0 + sum1); + break; + } + } + } + + if (*lmass > 0.) { + pmass++; + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (*lmass > 0.) { + pmass = nb_run->mass; + } + + if (i == 0) { + state_out[j * 2][0] = (1. - categorical_process[variable]->observation[j]->mass[output]) * initial[j]; + state_out[j * 2 + 1][1] = categorical_process[variable]->observation[j]->mass[output] * initial[j]; + + if (*lmass > 0.) { + *pmass++ += *lmass * state_out[j * 2][0]; + *pmass += *lmass * state_out[j * 2 + 1][1]; + } + } + + else { + for (k = 0;k <= index_nb_pattern;k++) { + sum0 = 0.; + if ((k < index_nb_pattern) || (i % 2 == 1)) { + state_out[j * 2][k] = (1. - categorical_process[variable]->observation[j]->mass[output]) * + (state_in[i - 1][j * 2][k] + state_in[i - 1][j * 2 + 1][k]); + sum0 += state_in[i - 1][j * 2 + 1][k]; + } + if (k > 0) { + sum0 += state_in[i - 1][j * 2][k - 1]; + } + state_out[j * 2 + 1][k] = categorical_process[variable]->observation[j]->mass[output] * sum0; + + if (*lmass > 0.) { + *pmass++ += *lmass * (state_out[j * 2][k] + state_out[j * 2 + 1][k]); + } + } + } + break; + } + } + } + + if (i < max_length - 1) { + for (j = 0;j < nb_state;j++) { + for (k = 0;k <= index_nb_pattern;k++) { + state_in[i][j * 2][k] = 0.; + state_in[i][j * 2 + 1][k] = 0.; + for (m = 0;m < nb_state;m++) { + state_in[i][j * 2][k] += transition[m][j] * state_out[m * 2][k]; + state_in[i][j * 2 + 1][k] += transition[m][j] * state_out[m * 2 + 1][k]; + } + } + } + } + + if (i % 2 == 1) { + index_nb_pattern++; + } + } + + // renormalization of the mixture of the distributions of the number of runs of + // the selected observation for taking account of the thresholds applied on + // the cumulative state occupancy distribution functions + + pmass = nb_run->mass; + sum0 = 0.; + for (i = 0;i < nb_run->nb_value;i++) { + sum0 += *pmass++; + } + + if (sum0 < 1.) { + pmass = nb_run->mass; + for (i = 0;i < nb_run->nb_value;i++) { + *pmass++ /= sum0; + } + } + + nb_run->nb_value_computation(); + nb_run->offset_computation(); + nb_run->cumul_computation(); + + nb_run->max_computation(); + nb_run->mean_computation(); + nb_run->variance_computation(); + + for (i = 0;i < nb_state * 2;i++) { + if (sojourn_type[i / 2] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[i / 2]; + for (j = 1;j < MIN(max_length + 1 , occupancy->nb_value);j++) { + delete [] state_nb_run[i][j * 2]; + delete [] state_nb_run[i][j * 2 + 1]; + } + delete [] state_nb_run[i]; + } + } + delete [] state_nb_run; + + for (i = 0;i < nb_state * 2;i++) { + delete [] state_out[i]; + } + delete [] state_out; + + for (i = 0;i < max_length - 1;i++) { + for (j = 0;j < nb_state * 2;j++) { + delete [] state_in[i][j]; + } + delete [] state_in[i]; + } + delete [] state_in; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mixture of the distributions of the number of occurrences of + * a categorical observation for a sequence length mixing distribution and + * a hidden semi-Markov chain. + * + * \param[in] variable observation process index, + * \param[in] output observation. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::output_nb_occurrence_mixture(int variable , int output) + +{ + int i , j , k , m , n; + int max_length , min , max; + double sum , *pmass , *omass , *lmass , **state_out , ***state_in; + Distribution *nb_occurrence; + DiscreteParametric *occupancy , ***observation; + + + nb_occurrence = categorical_process[variable]->nb_occurrence[output]; + + pmass = nb_occurrence->mass; + for (i = 0;i < nb_occurrence->nb_value;i++) { + *pmass++ = 0.; + } + + max_length = categorical_process[variable]->length->nb_value - 1; + + state_out = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + state_out[i] = new double[max_length + 1]; + } + + state_in = new double**[max_length - 1]; + for (i = 0;i < max_length - 1;i++) { + state_in[i] = new double*[nb_state]; + for (j = 0;j < nb_state;j++) { + state_in[i][j] = new double[i + 2]; + } + } + + // computation of the distributions of the number of occurrences of the selected observation + // for the different times spent in a state + + observation = new DiscreteParametric**[nb_state]; + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + occupancy = state_process->sojourn_time[i]; + observation[i] = new DiscreteParametric*[MIN(max_length + 1 , occupancy->nb_value)]; + + observation[i][0] = NULL; + for (j = 1;j < MIN(max_length + 1 , occupancy->nb_value);j++) { + observation[i][j] = new DiscreteParametric(BINOMIAL , 0 , j , D_DEFAULT , + categorical_process[variable]->observation[i]->mass[output]); + } + } + } + + // computation of the probabilities of leaving (semi-Markov) / of being in (Markov) a state as + // function of the number of occurrences of the selected observation + + lmass = categorical_process[variable]->length->mass; + + for (i = 0;i < max_length;i++) { + lmass++; + + for (j = 0;j < nb_state;j++) { + + // initialization of the probabilities of leaving a state at time i + + for (k = 0;k <= i + 1;k++) { + state_out[j][k] = 0.; + } + + switch (sojourn_type[j]) { + + // case semi-Markovian state + + case SEMI_MARKOVIAN : { + occupancy = state_process->sojourn_time[j]; + + for (k = (*lmass > 0. ? 1 : occupancy->offset);k <= MIN(i + 1 , occupancy->nb_value - 1);k++) { + if (*lmass > 0.) { + pmass = nb_occurrence->mass; + } + + for (m = 0;m <= i + 1;m++) { + if (k < i + 1) { + min = MAX(m - (i - k + 1) , 0); + max = MIN(k , m); + + if (max >= min) { + omass = observation[j][k]->mass + min; + sum = 0.; + for (n = min;n <= max;n++) { + sum += *omass++ * state_in[i - k][j][m - n]; + } + + if (i < max_length - 1) { + state_out[j][m] += occupancy->mass[k] * sum; + } + if (*lmass > 0.) { + *pmass += *lmass * (1. - occupancy->cumul[k - 1]) * sum; + } + } + } + + else { + sum = observation[j][k]->mass[m] * initial[j]; + + if (i < max_length - 1) { + switch (type) { + case ORDINARY : + state_out[j][m] += occupancy->mass[k] * sum; + break; + case EQUILIBRIUM : + state_out[j][m] += forward[j]->mass[k] * sum; + break; + } + } + + if (*lmass > 0.) { + switch (type) { + case ORDINARY : + *pmass += *lmass * (1. - occupancy->cumul[k - 1]) * sum; + break; + case EQUILIBRIUM : + *pmass += *lmass * (1. - forward[j]->cumul[k - 1]) * sum; + break; + } + } + } + + if (*lmass > 0.) { + pmass++; + } + } + } + break; + } + + // case Markovian state + + case MARKOVIAN : { + if (*lmass > 0.) { + pmass = nb_occurrence->mass; + } + + if (i == 0) { + state_out[j][0] = (1. - categorical_process[variable]->observation[j]->mass[output]) * initial[j]; + state_out[j][1] = categorical_process[variable]->observation[j]->mass[output] * initial[j]; + + if (*lmass > 0.) { + *pmass++ += *lmass * state_out[j][0]; + *pmass += *lmass * state_out[j][1]; + } + } + + else { + for (k = 0;k <= i + 1;k++) { + if (k < i + 1) { + state_out[j][k] += (1. - categorical_process[variable]->observation[j]->mass[output]) * + state_in[i - 1][j][k]; + } + if (k > 0) { + state_out[j][k] += categorical_process[variable]->observation[j]->mass[output] * + state_in[i - 1][j][k - 1]; + } + + if (*lmass > 0.) { + *pmass++ += *lmass * state_out[j][k]; + } + } + } + break; + } + } + } + + if (i < max_length - 1) { + for (j = 0;j < nb_state;j++) { + for (k = 0;k <= i + 1;k++) { + state_in[i][j][k] = 0.; + for (m = 0;m < nb_state;m++) { + state_in[i][j][k] += transition[m][j] * state_out[m][k]; + } + } + } + } + } + + // renormalization of the mixture of the distributions of the number of occurrences of + // the selected observation for taking account of the thresholds applied on + // the cumulative state occupancy distribution functions + + pmass = nb_occurrence->mass; + sum = 0.; + for (i = 0;i < nb_occurrence->nb_value;i++) { + sum += *pmass++; + } + + if (sum < 1.) { + pmass = nb_occurrence->mass; + for (i = 0;i < nb_occurrence->nb_value;i++) { + *pmass++ /= sum; + } + } + + nb_occurrence->nb_value_computation(); + nb_occurrence->offset_computation(); + nb_occurrence->cumul_computation(); + + nb_occurrence->max_computation(); + nb_occurrence->mean_computation(); + nb_occurrence->variance_computation(); + + for (i = 0;i < nb_state;i++) { + if (sojourn_type[i] == SEMI_MARKOVIAN) { + for (j = 1;j < MIN(max_length + 1 , state_process->sojourn_time[i]->nb_value);j++) { + delete observation[i][j]; + } + delete [] observation[i]; + } + } + delete [] observation; + + for (i = 0;i < nb_state;i++) { + delete [] state_out[i]; + } + delete [] state_out; + + for (i = 0;i < max_length - 1;i++) { + for (j = 0;j < nb_state;j++) { + delete [] state_in[i][j]; + } + delete [] state_in[i]; + } + delete [] state_in; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the characteristic distributions of a SemiMarkov object. + * + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] variable observation process index. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::characteristic_computation(int length , bool counting_flag , int variable) + +{ + if (nb_component > 0) { + bool computation[NB_OUTPUT_PROCESS + 1]; + int i , j , k; + double *memory; + DiscreteParametric dlength(UNIFORM , length , length , D_DEFAULT , D_DEFAULT); + + + memory = NULL; + + // computation of the state intensity and interval distributions + + if (((variable == I_DEFAULT) || (variable == 0)) && + ((!(state_process->length)) || + (dlength != *(state_process->length)))) { + computation[0] = true; + state_process->create_characteristic(dlength , false , counting_flag); + + index_state_distribution(); + + for (i = 0;i < nb_state;i++) { + if (type == ORDINARY) { + state_no_occurrence_probability(i); + } + state_first_occurrence_distribution(i); + + if (type == ORDINARY) { + state_leave_probability(i); + } + if (state_process->leave[i] < 1. - DOUBLE_ERROR) { + state_recurrence_time_distribution(i); + } + else { + delete state_process->recurrence_time[i]; + state_process->recurrence_time[i] = NULL; + } + + if ((sojourn_type[i] == MARKOVIAN) && (transition[i][i] < 1.)) { + if (transition[i][i] > 0.) { + state_process->sojourn_time[i] = new DiscreteParametric(NEGATIVE_BINOMIAL , 1 , + I_DEFAULT , 1. , 1. - transition[i][i] , + OCCUPANCY_THRESHOLD); + state_process->sojourn_time[i]->parameter = D_DEFAULT; + state_process->sojourn_time[i]->probability = D_DEFAULT; + } + + else { + state_process->sojourn_time[i] = new DiscreteParametric(UNIFORM , 1 , 1 , + D_DEFAULT , D_DEFAULT); + state_process->sojourn_time[i]->sup_bound = I_DEFAULT; + } + + state_process->sojourn_time[i]->ident = CATEGORICAL; + state_process->sojourn_time[i]->inf_bound = I_DEFAULT; + } + } + +# ifdef MESSAGE + if (type == EQUILIBRIUM) { + double sum = 0.; + + // computation of the stationary distribution in the case of an equilibrium process + // with renormalization for taking account of the thresholds applied on + // the cumulative distribution functions of the recurrence times in states + + for (i = 0;i < nb_state;i++) { + sum += 1. / state_process->recurrence_time[i]->mean; + } + + cout << "\n" << STAT_label[STATL_STATIONARY_PROBABILITIES] << endl; + for (i = 0;i < nb_state;i++) { + cout << initial[i] << " | " + << 1. / (state_process->recurrence_time[i]->mean * sum) << endl; + } + } +# endif + + } + + else { + computation[0] = false; + } + + // computation of the observation intensity and interval distributions + + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) && ((variable == I_DEFAULT) || (i == variable)) && + ((!(categorical_process[i]->length)) || + (dlength != *(categorical_process[i]->length)))) { + computation[i + 1] = true; + categorical_process[i]->create_characteristic(dlength , true , counting_flag); + + index_output_distribution(i); + + if (!memory) { + memory = memory_computation(); + } + + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if (type == ORDINARY) { + output_no_occurrence_probability(i , j); + } + if (categorical_process[i]->no_occurrence[j] < 1. - DOUBLE_ERROR) { + output_first_occurrence_distribution(i , j); + } + else { + delete categorical_process[i]->first_occurrence[j]; + categorical_process[i]->first_occurrence[j] = NULL; + categorical_process[i]->leave[j] = 1.; + } + + if ((type == ORDINARY) && (categorical_process[i]->first_occurrence[j])) { + output_leave_probability(memory , i , j); + } + if (categorical_process[i]->leave[j] < 1. - DOUBLE_ERROR) { + output_recurrence_time_distribution(memory , i , j); + } + else { + delete categorical_process[i]->recurrence_time[j]; + categorical_process[i]->recurrence_time[j] = NULL; + } + + for (k = 0;k < nb_state;k++) { + if ((categorical_process[i]->observation[k]->mass[j] > 0.) && + ((stype[k] != ABSORBING) || (categorical_process[i]->observation[k]->mass[j] < 1.))) { + break; + } + } + + if (k < nb_state) { + output_sojourn_time_distribution(memory , i , j); + } + else { + categorical_process[i]->absorption[j] = 1.; + delete categorical_process[i]->sojourn_time[j]; + categorical_process[i]->sojourn_time[j] = NULL; + } + } + } + + else { + computation[i + 1] = false; + } + } + + delete [] memory; + + if (counting_flag) { + + // computation of the state counting distributions + + if (computation[0]) { + for (i = 0;i < nb_state;i++) { + state_nb_pattern_mixture(i , RUN); + state_nb_pattern_mixture(i , OCCURRENCE); + } + } + + // computation of the observation counting distributions + + for (i = 0;i < nb_output_process;i++) { + if (computation[i + 1]) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + output_nb_run_mixture(i , j); + output_nb_occurrence_mixture(i , j); + } + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the characteristic distributions of a SemiMarkov object. + * + * \param[in] seq reference on a SemiMarkovData object, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] variable observation process index, + * \param[in] length_flag flag on the sequence length. + */ +/*--------------------------------------------------------------*/ + +void SemiMarkov::characteristic_computation(const SemiMarkovData &seq , bool counting_flag , + int variable , bool length_flag) + +{ + if (nb_component > 0) { + bool computation[NB_OUTPUT_PROCESS + 1]; + int i , j , k; + int seq_variable; + double *memory; + Distribution dlength(*(seq.length_distribution)); + + + memory = NULL; + + // computation of the state intensity and interval distributions + + if (((variable == I_DEFAULT) || (variable == 0)) && ((!length_flag) || + ((length_flag) && ((!(state_process->length)) || + (dlength != *(state_process->length)))))) { + computation[0] = true; + state_process->create_characteristic(dlength , false , counting_flag); + + index_state_distribution(); + + for (i = 0;i < nb_state;i++) { + if (type == ORDINARY) { + state_no_occurrence_probability(i); + } + if (seq.type[0] == STATE) { + state_first_occurrence_distribution(i , ((seq.characteristics[0]) && (i < seq.marginal_distribution[0]->nb_value) && (seq.characteristics[0]->first_occurrence[i]) && (seq.characteristics[0]->first_occurrence[i]->nb_element > 0) ? seq.characteristics[0]->first_occurrence[i]->nb_value : 1)); + } + else { + state_first_occurrence_distribution(i); + } + + if (type == ORDINARY) { + state_leave_probability(i); + } + if (state_process->leave[i] < 1. - DOUBLE_ERROR) { + if (seq.type[0] == STATE) { + state_recurrence_time_distribution(i , ((seq.characteristics[0]) && (i < seq.marginal_distribution[0]->nb_value) && (seq.characteristics[0]->recurrence_time[i]->nb_element > 0) ? seq.characteristics[0]->recurrence_time[i]->nb_value : 1)); + } + else { + state_recurrence_time_distribution(i); + } + } + else { + delete state_process->recurrence_time[i]; + state_process->recurrence_time[i] = NULL; + } + + if ((sojourn_type[i] == MARKOVIAN) && (transition[i][i] < 1.)) { + if (transition[i][i] > 0.) { + state_process->sojourn_time[i] = new DiscreteParametric(NEGATIVE_BINOMIAL , 1 , + I_DEFAULT , 1. , 1. - transition[i][i] , + OCCUPANCY_THRESHOLD); + + if ((seq.type[0] == STATE) && (seq.characteristics[0]) && (i < seq.marginal_distribution[0]->nb_value) && + (seq.characteristics[0]->sojourn_time[i]->nb_value > state_process->sojourn_time[i]->nb_value)) { + state_process->sojourn_time[i]->computation(seq.characteristics[0]->sojourn_time[i]->nb_value , OCCUPANCY_THRESHOLD); + } + state_process->sojourn_time[i]->parameter = D_DEFAULT; + state_process->sojourn_time[i]->probability = D_DEFAULT; + } + + else { + state_process->sojourn_time[i] = new DiscreteParametric(UNIFORM , 1 , 1 , + D_DEFAULT , D_DEFAULT); + state_process->sojourn_time[i]->sup_bound = I_DEFAULT; + } + + state_process->sojourn_time[i]->ident = CATEGORICAL; + state_process->sojourn_time[i]->inf_bound = I_DEFAULT; + } + } + +# ifdef MESSAGE + if (type == EQUILIBRIUM) { + double sum = 0.; + + // computation of the stationary distribution in the case of an equilibrium process + // with renormalization for taking account of the thresholds applied on + // the cumulative distribution functions of the recurrence times in states + + for (i = 0;i < nb_state;i++) { + sum += 1. / state_process->recurrence_time[i]->mean; + } + + cout << "\n" << STAT_label[STATL_STATIONARY_PROBABILITIES] << endl; + for (i = 0;i < nb_state;i++) { + cout << initial[i] << " | " + << 1. / (state_process->recurrence_time[i]->mean * sum) << endl; + } + } +# endif + + } + + else { + computation[0] = false; + } + + // computation of the observation intensity and interval distributions + + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) && ((variable == I_DEFAULT) || (i == variable)) && + ((!length_flag) || ((length_flag) && ((!(categorical_process[i]->length)) || + (dlength != *(categorical_process[i]->length)))))) { + computation[i + 1] = true; + categorical_process[i]->create_characteristic(dlength , true , counting_flag); + + switch (seq.type[0]) { + case STATE : + seq_variable = i + 1; + break; + default : + seq_variable = i; + break; + } + + index_output_distribution(i); + + if (!memory) { + memory = memory_computation(); + } + + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if (type == ORDINARY) { + output_no_occurrence_probability(i , j); + } + if (categorical_process[i]->no_occurrence[j] < 1. - DOUBLE_ERROR) { + output_first_occurrence_distribution(i , j , ((seq.characteristics[seq_variable]) && (j < seq.characteristics[seq_variable]->nb_value) && (seq.characteristics[seq_variable]->first_occurrence[j]->nb_element > 0) ? seq.characteristics[seq_variable]->first_occurrence[j]->nb_value : 1)); + } + else { + delete categorical_process[i]->first_occurrence[j]; + categorical_process[i]->first_occurrence[j] = NULL; + categorical_process[i]->leave[j] = 1.; + } + + if ((type == ORDINARY) && (categorical_process[i]->first_occurrence[j])) { + output_leave_probability(memory , i , j); + } + if (categorical_process[i]->leave[j] < 1. - DOUBLE_ERROR) { + output_recurrence_time_distribution(memory , i , j , ((seq.characteristics[seq_variable]) && (j < seq.characteristics[seq_variable]->nb_value) && (seq.characteristics[seq_variable]->recurrence_time[j]->nb_element > 0) ? seq.characteristics[seq_variable]->recurrence_time[j]->nb_value : 1)); + } + else { + delete categorical_process[i]->recurrence_time[j]; + categorical_process[i]->recurrence_time[j] = NULL; + } + + for (k = 0;k < nb_state;k++) { + if ((categorical_process[i]->observation[k]->mass[j] > 0.) && + ((stype[k] != ABSORBING) || (categorical_process[i]->observation[k]->mass[j] < 1.))) { + break; + } + } + + if (k < nb_state) { + output_sojourn_time_distribution(memory , i , j , ((seq.characteristics[seq_variable]) && (j < seq.characteristics[seq_variable]->nb_value) && (seq.characteristics[seq_variable]->sojourn_time[j]->nb_element > 0) ? seq.characteristics[seq_variable]->sojourn_time[j]->nb_value : 1)); + } + else { + categorical_process[i]->absorption[j] = 1.; + delete categorical_process[i]->sojourn_time[j]; + categorical_process[i]->sojourn_time[j] = NULL; + } + } + } + + else { + computation[i + 1] = false; + } + } + + delete [] memory; + + if (counting_flag) { + + // computation of the state counting distributions + + if (computation[0]) { + for (i = 0;i < nb_state;i++) { + state_nb_pattern_mixture(i , RUN); + state_nb_pattern_mixture(i , OCCURRENCE); + } + } + + // computation of the observation counting distributions + + for (i = 0;i < nb_output_process;i++) { + if (computation[i + 1]) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + output_nb_run_mixture(i , j); + output_nb_occurrence_mixture(i , j); + } + } + } + } + } +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/time_events.cpp b/src/cpp/sequence_analysis/time_events.cpp new file mode 100644 index 0000000..c3a0637 --- /dev/null +++ b/src/cpp/sequence_analysis/time_events.cpp @@ -0,0 +1,4072 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "stat_tool/stat_label.h" + +#include "renewal.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the observation period frequency distribution and + * the number of events frequency distributions on the basis of + * triplets {observation period, number of events, frequency}. + */ +/*--------------------------------------------------------------*/ + +void TimeEvents::build_frequency_distribution() + +{ + int i; + int max_nb_event , *ptime , *pnb_event , *pfrequency; + + + // construction of the observation period frequency distribution and + // the number of events frequency distributions + + htime = new FrequencyDistribution(time[nb_class - 1] + 1); + + hnb_event = new FrequencyDistribution*[time[nb_class - 1] + 1]; + for (i = 0;i <= time[nb_class - 1];i++) { + hnb_event[i] = NULL; + } + + ptime = time; + pnb_event = nb_event; + pfrequency = frequency; + max_nb_event = 0; + + for (i = 0;i < nb_class - 1;i++) { + if (*(ptime + 1) != *ptime) { + hnb_event[*ptime] = new FrequencyDistribution(*pnb_event + 1); + if (*pnb_event > max_nb_event) { + max_nb_event = *pnb_event; + } + } + htime->frequency[*ptime++] += *pfrequency++; + pnb_event++; + } + + hnb_event[*ptime] = new FrequencyDistribution(*pnb_event + 1); + if (*pnb_event > max_nb_event) { + max_nb_event = *pnb_event; + } + htime->frequency[*ptime] += *pfrequency; + + mixture = new FrequencyDistribution(max_nb_event + 1); + + htime->offset_computation(); + htime->nb_element = nb_element; + htime->max_computation(); + htime->mean_computation(); + htime->variance_computation(); + + // update of the number of events frequency distributions + + ptime = time; + pnb_event = nb_event; + pfrequency = frequency; + + for (i = 0;i < nb_class;i++) { + hnb_event[*ptime++]->frequency[*pnb_event] += *pfrequency; + mixture->frequency[*pnb_event++] += *pfrequency++; + } + + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i]) { + hnb_event[i]->offset_computation(); + hnb_event[i]->nb_element_computation(); + hnb_event[i]->max_computation(); + hnb_event[i]->mean_computation(); + hnb_event[i]->variance_computation(); + } + } + + mixture->offset_computation(); + mixture->nb_element = nb_element; + mixture->max_computation(); + mixture->mean_computation(); + mixture->variance_computation(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the triplets {observation period, number of events, frequency} + * on the basis of the observation period frequency distribution and + * the number of events frequency distributions. + */ +/*--------------------------------------------------------------*/ + +void TimeEvents::build_sample() + +{ + int i , j; + int *ptime , *pnb_event , *pfrequency , *hfrequency; + + + nb_class = 0; + + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + hfrequency = hnb_event[i]->frequency + hnb_event[i]->offset; + for (j = hnb_event[i]->offset;j < hnb_event[i]->nb_value;j++) { + if (*hfrequency++ > 0) { + nb_class++; + } + } + } + } + + time = new int[nb_class]; + nb_event = new int[nb_class]; + frequency = new int[nb_class]; + + ptime = time; + pnb_event = nb_event; + pfrequency = frequency; + + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + hfrequency = hnb_event[i]->frequency + hnb_event[i]->offset; + for (j = hnb_event[i]->offset;j < hnb_event[i]->nb_value;j++) { + if (*hfrequency > 0) { + *ptime++ = i; + *pnb_event++ = j; + *pfrequency++ = *hfrequency; + } + hfrequency++; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a TimeEvents object from pairs {observation period, number of events}. + * + * \param[in] inb_element number of pairs {observation period, number of events}, + * \param[in] itime pointer on the observation periods, + * \param[in] inb_event pointer on the numbers of events. + */ +/*--------------------------------------------------------------*/ + +void TimeEvents::build(int inb_element , int *itime , int *inb_event) + +{ + int i , j , k; + int btime , min_time , max_time , bnb_event , min_nb_event , max_nb_event , + nb_selected , *ptime , *pnb_event , *selected_nb_event , *snb_event; + + + nb_element = inb_element; + + // computation of the maximum/minimum observation periods and the maximum/minimum numbers of events + + ptime = itime; + pnb_event = inb_event; + max_time = 1; + max_nb_event = 0; + + for (i = 0;i < nb_element;i++) { + if (*ptime > max_time) { + max_time = *ptime; + } + if (*pnb_event > max_nb_event) { + max_nb_event = *pnb_event; + } + ptime++; + pnb_event++; + } + + ptime = itime; + pnb_event = inb_event; + min_time = max_time; + min_nb_event = max_nb_event; + + for (i = 0;i < nb_element;i++) { + if (*ptime < min_time) { + min_time = *ptime; + } + if (*pnb_event < min_nb_event) { + min_nb_event = *pnb_event; + } + ptime++; + pnb_event++; + } + + nb_class = MIN(nb_element , (max_time - min_time + 1) * + (max_nb_event - min_nb_event + 1)); + time = new int[nb_class]; + nb_event = new int[nb_class]; + frequency = new int[nb_class]; + + selected_nb_event = new int[nb_element]; + + // sort of the pairs {observation period, number of events} first by increasing observation period + // then by increasing number of events + + btime = 0; + nb_class = 0; + i = 0; + + do { + + // search for the minimum unselected observation period + + ptime = itime; + min_time = max_time; + for (j = 0;j < nb_element;j++) { + if ((*ptime > btime) && (*ptime < min_time)) { + min_time = *ptime; + } + ptime++; + } + btime = min_time; + + // extraction of the pairs corresponding to the selected observation period + + ptime = itime; + pnb_event = inb_event; + nb_selected = 0; + for (j = 0;j < nb_element;j++) { + if (*ptime == btime) { + selected_nb_event[nb_selected++] = *pnb_event; + } + ptime++; + pnb_event++; + } + + // sort of the pairs corresponding to the selected observation period + + bnb_event = -1; + j = 0; + + do { + + // search for the minimum unselected number of events + + snb_event = selected_nb_event; + min_nb_event = max_nb_event; + for (k = 0;k < nb_selected;k++) { + if ((*snb_event > bnb_event) && (*snb_event < min_nb_event)) { + min_nb_event = *snb_event; + } + snb_event++; + } + bnb_event = min_nb_event; + + // constitution of the triplets {observation period, number of events, frequency} + + time[nb_class] = btime; + nb_event[nb_class] = bnb_event; + + frequency[nb_class] = 0; + snb_event = selected_nb_event; + for (k = 0;k < nb_selected;k++) { + if (*snb_event == bnb_event) { + i++; + j++; + frequency[nb_class]++; + } + snb_event++; + } + nb_class++; + } + while (j < nb_selected); + + } + while (i < nb_element); + + delete [] selected_nb_event; + + build_frequency_distribution(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the TimeEvents class. + * + * \param[in] inb_class number of classes corresponding to fixed {observation period, number of events}. + */ +/*--------------------------------------------------------------*/ + +TimeEvents::TimeEvents(int inb_class) + +{ + nb_element = 0; + nb_class = inb_class; + + if (nb_class == 0) { + time = NULL; + nb_event = NULL; + frequency = NULL; + } + + else { + int i; + + time = new int[nb_class]; + nb_event = new int[nb_class]; + frequency = new int[nb_class]; + + for (i = 0;i < nb_class;i++) { + time[i] = 0; + nb_event[i] = 0; + frequency[i] = 0; + } + } + + htime = NULL; + hnb_event = NULL; + mixture = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a TimeEvents object from a FrequencyDistribution object. + * + * \param[in] inb_event reference on a FrequencyDistribution object, + * \param[in] itime observation period. + */ +/*--------------------------------------------------------------*/ + +TimeEvents::TimeEvents(FrequencyDistribution &inb_event, int itime) + +{ + int i; + int *ptime , *pnb_event , *pfrequency; + + + nb_class = 0; + for (i = inb_event.offset;i < inb_event.nb_value;i++) { + if (inb_event.frequency[i] > 0) { + nb_class++; + } + } + + time = new int[nb_class]; + nb_event = new int[nb_class]; + frequency = new int[nb_class]; + + nb_element = inb_event.nb_element; + + // constitution of the triplets {observation period, number of events, frequency} + + ptime = time; + pnb_event = nb_event; + pfrequency = frequency; + + for (i = inb_event.offset;i < inb_event.nb_value;i++) { + if (inb_event.frequency[i] > 0) { + *ptime++ = itime; + *pnb_event++ = i; + *pfrequency++ = inb_event.frequency[i]; + } + } + + // construction of the observation period frequency distribution and + // the number of events frequency distributions + + htime = new FrequencyDistribution(itime + 1); + + htime->frequency[itime] = inb_event.nb_element; + htime->offset = itime; + htime->nb_element = inb_event.nb_element; + htime->max = inb_event.nb_element; + htime->mean = itime; + htime->variance = 0.; + + hnb_event = new FrequencyDistribution*[itime + 1]; + for (i = 0;i < itime;i++) { + hnb_event[i] = NULL; + } + hnb_event[itime] = new FrequencyDistribution(inb_event); + + mixture = new FrequencyDistribution(inb_event); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a TimeEvents object. + * + * \param[in] timev reference on a TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +void TimeEvents::copy(const TimeEvents &timev) + +{ + int i; + + + // copy of the triplets {observation period, number of events, frequency} + + nb_element = timev.nb_element; + nb_class = timev.nb_class; + + time = new int[nb_class]; + nb_event = new int[nb_class]; + frequency = new int[nb_class]; + + for (i = 0;i < nb_class;i++) { + time[i] = timev.time[i]; + nb_event[i] = timev.nb_event[i]; + frequency[i] = timev.frequency[i]; + } + + // copy of the observation period frequency distribution and + // the number of events frequency distributions + + htime = new FrequencyDistribution(*(timev.htime)); + + hnb_event = new FrequencyDistribution*[htime->nb_value]; + + for (i = 0;i < htime->offset;i++) { + hnb_event[i] = NULL; + } + + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + hnb_event[i] = new FrequencyDistribution(*(timev.hnb_event[i])); + } + else { + hnb_event[i] = NULL; + } + } + + mixture = new FrequencyDistribution(*(timev.mixture)); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of TimeEvents objects. + * + * \param[in] nb_sample number of TimeEvents objects, + * \param[in] ptimev pointer on the TimeEvents objects. + */ +/*--------------------------------------------------------------*/ + +void TimeEvents::merge(int nb_sample , const TimeEvents **ptimev) + +{ + int i , j; + int nb_histo; + const FrequencyDistribution **phisto; + + + nb_element = 0; + for (i = 0;i < nb_sample;i++) { + nb_element += ptimev[i]->nb_element; + } + + phisto = new const FrequencyDistribution*[nb_sample]; + + // merging of the observation period frequency distributions + + for (i = 0;i < nb_sample;i++) { + phisto[i] = ptimev[i]->htime; + } + htime = new FrequencyDistribution(nb_sample , phisto); + + // merging of the number of events frequency distributions for a given observation period + + hnb_event = new FrequencyDistribution*[htime->nb_value]; + + for (i = 0;i < htime->offset;i++) { + hnb_event[i] = NULL; + } + + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i]) { + nb_histo = 0; + for (j = 0;j < nb_sample;j++) { + if ((i < ptimev[j]->htime->nb_value) && (ptimev[j]->hnb_event[i])) { + phisto[nb_histo++] = ptimev[j]->hnb_event[i]; + } + } + hnb_event[i] = new FrequencyDistribution(nb_histo , phisto); + } + + else { + hnb_event[i] = NULL; + } + } + + for (i = 0;i < nb_sample;i++) { + phisto[i] = ptimev[i]->mixture; + } + mixture = new FrequencyDistribution(nb_sample , phisto); + + delete [] phisto; + + build_sample(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of TimeEvents objects. + * + * \param[in] nb_sample number of TimeEvents objects, + * \param[in] itimev pointer on the TimeEvents objects. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* TimeEvents::merge(int nb_sample , const vector &itimev) const + +{ + int i; + TimeEvents *timev; + const TimeEvents **ptimev; + + + nb_sample++; + ptimev = new const TimeEvents*[nb_sample]; + + ptimev[0] = this; + for (i = 1;i < nb_sample;i++) { + ptimev[i] = new TimeEvents(itimev[i - 1]); + } + + timev = new TimeEvents(nb_sample , ptimev); + + for (i = 1;i < nb_sample;i++) { + delete ptimev[i]; + } + delete [] ptimev; + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +void TimeEvents::remove() + +{ + int i; + + + delete [] time; + delete [] nb_event; + delete [] frequency; + + if (hnb_event) { + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + delete hnb_event[i]; + } + } + delete [] hnb_event; + } + + delete htime; + delete mixture; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the TimeEvents class. + */ +/*--------------------------------------------------------------*/ + +TimeEvents::~TimeEvents() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the TimeEvents class. + * + * \param[in] timev reference on a TimeEvents object. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents& TimeEvents::operator=(const TimeEvents &timev) + +{ + if (&timev != this) { + remove(); + copy(timev); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the number of events frequency distribution for a given observation period or + * of the mixture the number of events frequency distributions. + * + * \param[in] error reference on a StatError object, + * \param[in] histo_type frequency distribution type (NB_EVENT/NB_EVENT_MIXTURE), + * \param[in] itime observation period. + * + * \return DiscreteDistributionData object. + */ +/*--------------------------------------------------------------*/ + +DiscreteDistributionData* TimeEvents::extract(StatError &error , renewal_distribution histo_type , + int itime) const + +{ + DiscreteDistributionData *histo; + + + error.init(); + + if (histo_type == NB_EVENT) { + if ((itime < htime->offset) || (itime >= htime->nb_value) || (htime->frequency[itime] == 0)) { + histo = NULL; + error.update(SEQ_error[SEQR_OBSERVATION_TIME]); + } + else { + histo = new DiscreteDistributionData(*hnb_event[itime]); + } + } + + else if (histo_type == NB_EVENT_MIXTURE) { + histo = new DiscreteDistributionData(*mixture); + } + + return histo; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Change of the time unit of a TimeEvents object. + * + * \param[in] error reference on a StatError object, + * \param[in] scaling_coeff scaling factor. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* TimeEvents::time_scaling(StatError &error , int scaling_coeff) const + +{ + bool status = true; + int i; + TimeEvents *timev; + + + timev = NULL; + error.init(); + + if (scaling_coeff < 1) { + status = false; + error.update(STAT_error[STATR_SCALING_COEFF]); + } + if ((htime->nb_value - 1) * scaling_coeff > MAX_TIME) { + status = false; + error.update(SEQ_error[SEQR_LONG_OBSERVATION_TIME]); + } + + if (status) { + timev = new TimeEvents(nb_class); + + timev->nb_element = nb_element; + + for (i = 0;i < nb_class;i++) { + timev->time[i] = time[i] * scaling_coeff; + timev->nb_event[i] = nb_event[i]; + timev->frequency[i] = frequency[i]; + } + + timev->build_frequency_distribution(); + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of triplets {observation period, number of events, frequency} on + * an observation period criterion. + * + * \param[in] error reference on a StatError object, + * \param[in] min_time minimum observation period, + * \param[in] max_time maximum observation period. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* TimeEvents::time_select(StatError &error , int min_time , int max_time) const + +{ + bool status = true; + int i , j; + int bnb_class; + TimeEvents *timev; + + + timev = NULL; + error.init(); + + if ((min_time < 1) || (min_time > max_time)) { + status = false; + error.update(SEQ_error[SEQR_MIN_TIME]); + } + if ((max_time < htime->offset) || (max_time < min_time)) { + status = false; + error.update(SEQ_error[SEQR_MAX_TIME]); + } + + if (status) { + + // computation of the number of classes + + bnb_class = 0; + for (i = 0;i < nb_class;i++) { + if ((time[i] >= min_time) && (time[i] <= max_time)) { + bnb_class++; + } + } + + // copy of the selected triplets + + timev = new TimeEvents(bnb_class); + + i = 0; + for (j = 0;j < nb_class;j++) { + if ((time[j] >= min_time) && (time[j] <= max_time)) { + timev->time[i] = time[j]; + timev->nb_event[i] = nb_event[j]; + timev->frequency[i] = frequency[j]; + i++; + } + } + + timev->nb_element_computation(); + + if (timev->nb_element > 0) { + timev->build_frequency_distribution(); + } + + else { + delete timev; + timev = NULL; + error.update(SEQ_error[SEQR_EMPTY_RENEWAL_DATA]); + } + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Selection of triplets {observation period, number of events, frequency} on + * a number of events criterion. + * + * \param[in] error reference on a StatError object, + * \param[in] min_nb_event minimum number of events, + * \param[in] max_nb_event maximum number of events. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* TimeEvents::nb_event_select(StatError &error , int min_nb_event , int max_nb_event) const + +{ + bool status = true; + int i , j; + int bnb_class; + TimeEvents *timev; + + + timev = NULL; + error.init(); + + if ((min_nb_event < 0) || (min_nb_event > max_nb_event)) { + status = false; + error.update(SEQ_error[SEQR_MIN_NB_EVENT]); + } + if ((max_nb_event < mixture->offset) || (max_nb_event < min_nb_event)) { + status = false; + error.update(SEQ_error[SEQR_MAX_NB_EVENT]); + } + + if (status) { + + // computation of the number of classes + + bnb_class = 0; + for (i = 0;i < nb_class;i++) { + if ((nb_event[i] >= min_nb_event) && (nb_event[i] <= max_nb_event)) { + bnb_class++; + } + } + + // copy of the selected triplets + + timev = new TimeEvents(bnb_class); + + i = 0; + for (j = 0;j < nb_class;j++) { + if ((nb_event[j] >= min_nb_event) && (nb_event[j] <= max_nb_event)) { + timev->time[i] = time[j]; + timev->nb_event[i] = nb_event[j]; + timev->frequency[i] = frequency[j]; + i++; + } + } + + timev->nb_element_computation(); + + if (timev->nb_element > 0) { + timev->build_frequency_distribution(); + } + + else { + delete timev; + timev = NULL; + error.update(SEQ_error[SEQR_EMPTY_RENEWAL_DATA]); + } + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a TimeEvents object from a FrequencyDistribution object. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_event reference on a FrequencyDistribution object, + * \param[in] itime observation period. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* TimeEvents::build(StatError &error , FrequencyDistribution &nb_event , int itime) + +{ + bool status = true; + TimeEvents *timev; + + + timev = NULL; + error.init(); + + if ((nb_event.nb_value == 1) || (itime / (nb_event.nb_value - 1) < MIN_INTER_EVENT)) { + status = false; + error.update(SEQ_error[SEQR_SHORT_OBSERVATION_TIME]); + } + if (itime > MAX_TIME) { + status = false; + error.update(SEQ_error[SEQR_LONG_OBSERVATION_TIME]); + } + + if (status) { + timev = new TimeEvents(nb_event , itime); + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a TimeEvents object from pairs {observation period, number of events}. + * + * \param[in] error reference on a StatError object, + * \param[in] itime pairs {observation period, number of events}. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* TimeEvents::build(StatError &error , const vector > &time_nb_event) + +{ + bool status = true; + int i; + int inb_element , *itime , *inb_event; + TimeEvents *timev; + + + timev = NULL; + error.init(); + + if (!time_nb_event.empty()) { + inb_element = time_nb_event.size(); + + for (i = 0;i < inb_element;i++) { + if (time_nb_event[i].size() != 2) { + status = false; + error.update(SEQ_error[SEQR_TIME_NB_EVENT_PAIR] , i); + } + } + } + else { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + if (status) { + itime = new int[inb_element]; + inb_event = new int[inb_element]; + + for (i = 0;i < inb_element;i++) { + itime[i] = time_nb_event[i][0]; + inb_event[i] = time_nb_event[i][1]; + } + + timev = new TimeEvents(); + timev->build(inb_element , itime , inb_event); + + delete [] itime; + delete [] inb_event; + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a TimeEvents object from a file whose format is: + * one triplet {observation period > 0, number of events >= 0, frequency >= 0} per line. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* TimeEvents::ascii_read(StatError &error , const string path) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + bool status , lstatus; + int i , j; + int line , nb_class , nb_element , value , time , nb_event; + TimeEvents *timev; + ifstream in_file(path.c_str()); + + + timev = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + + // 1st pass: analysis of each line format and search for + // the number of triplets {observation period, number of events, frequency} + + status = true; + line = 0; + time = 0; + nb_event = -1; + nb_class = 0; + nb_element = 0; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + if (i <= 2) { + lstatus = true; + +/* try { + value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + value = atoi(token->c_str()); + + // test observation period > 0, number of events >= 0, frequency >= 0 + + if ((lstatus) && (((i == 0) && (value <= 0)) || + ((i > 0) && (value < 0)))) { + lstatus = false; + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_DATA_TYPE] , line , i + 1); + } + + else { + switch (i) { + + // test ordered samples (observation period) + + case 0 : { + if (value < time) { + status = false; + error.update(SEQ_parsing[SEQP_TIME_ORDER] , line , i + 1); + } + else if (value > time) { + time = value; + nb_event = -1; + } + if (value > MAX_TIME) { + status = false; + error.update(SEQ_parsing[SEQP_MAX_TIME] , line , i + 1); + } + break; + } + + // test ordered samples (number of events) + + case 1 : { + if (value <= nb_event) { + status = false; + error.update(SEQ_parsing[SEQP_NB_EVENT_ORDER] , line , i + 1); + } + else { + nb_event = value; + } + break; + } + + case 2 : { + if (value > 0) { + nb_class++; + nb_element += value; + } + break; + } + } + } + } + + i++; + } + + // test 3 items per line + + if ((i > 0) && (i != 3)) { + status = false; + error.correction_update(STAT_parsing[STATP_NB_TOKEN] , 3 , line); + } + } + + if (nb_element == 0) { + status = false; + error.update(STAT_parsing[STATP_EMPTY_SAMPLE]); + } + + // 2nd pass: data reading + + if (status) { +// in_file.close(); +// in_file.open(path.c_str() , ios::in); + in_file.clear(); + in_file.seekg(0,ios::beg); + + timev = new TimeEvents(nb_class); + timev->nb_element = nb_element; + + i = 0; + + while (getline(in_file , buffer)) { + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + j = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (j) { + case 0 : +// timev->time[i] = stoi(*token); in C++ 11 + timev->time[i] = atoi(token->c_str()); + break; + case 1 : +// timev->nb_event[i] = stoi(*token); in C++ 11 + timev->nb_event[i] = atoi(token->c_str()); + break; + case 2 : +// timev->frequency[i] = stoi(*token); in C++ 11 + timev->frequency[i] = atoi(token->c_str()); + break; + } + + j++; + } + + if (timev->frequency[i] > 0) { + i++; + } + } + + timev->build_frequency_distribution(); + } + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a TimeEvents object from a file whose format is: + * one pair {observation period > 0, number of events >= 0} per line. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +TimeEvents* TimeEvents::old_ascii_read(StatError &error , const string path) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + bool status , lstatus; + int i , j; + int line , nb_element , value , *time , *nb_event; + TimeEvents *timev; + ifstream in_file(path.c_str()); + + + timev = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + + // 1st pass: analysis of each line format and search for + // the number of pairs {observation period, number of events} + + status = true; + line = 0; + nb_element = 0; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + if (i <= 1) { + lstatus = true; + +/* try { + value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + value = atoi(token->c_str()); + + // test observation period > 0, number of events >= 0 + + if ((lstatus) && (((i == 0) && ((value <= 0) || (value > MAX_TIME))) || + ((i == 1) && (value < 0)))) { + lstatus = false; + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_DATA_TYPE] , line , i + 1); + } + } + + i++; + } + + // test 2 items per line + + if (i > 0) { + if (i != 2) { + status = false; + error.correction_update(STAT_parsing[STATP_NB_TOKEN] , 2 , line); + } + nb_element++; + } + } + + if (nb_element == 0) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + // 2nd pass: data reading + + if (status) { +// in_file.close(); +// in_file.open(path.c_str() , ios::in); + in_file.clear(); + in_file.seekg(0,ios::beg); + + time = new int[nb_element]; + nb_event = new int[nb_element]; + i = 0; + + while (getline(in_file , buffer)) { + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + j = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (j) { + case 0 : +// time[i] = stoi(*token); in C++ 11 + time[i] = atoi(token->c_str()); + break; + case 1 : +// nb_event[i] = stoi(*token); in C++ 11 + nb_event[i] = atoi(token->c_str()); + break; + } + + j++; + } + + i++; + } + + timev = new TimeEvents(nb_element , time , nb_event); + + delete [] time; + delete [] nb_event; + } + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing on a single line of a TimeEvents object. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& TimeEvents::line_write(ostream &os) const + +{ + os << STAT_label[STATL_SAMPLE_SIZE] << ": " << nb_element << " " + << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_MEAN] << ": " << htime->mean << " " + << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_MEAN] << ": " << mixture->mean; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a TimeEvents object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level, + * \param[in] type renewal process type (ORDINARY/EQUILIBRIUM). + */ +/*--------------------------------------------------------------*/ + +ostream& TimeEvents::ascii_write(ostream &os , bool exhaustive , process_type type) const + +{ + int i; + + + if ((htime->variance > 0.) && (exhaustive)) { + os << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + htime->ascii_characteristic_print(os); + + os << "\n | " << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + htime->ascii_print(os); + } + + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + if (((exhaustive) && (htime->variance > 0.)) || (i > htime->offset)) { + os << "\n"; + } + os << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " + << i << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + hnb_event[i]->ascii_characteristic_print(os); + os << STAT_label[STATL_VARIANCE_MEAN_RATIO] << ": " + << hnb_event[i]->variance / hnb_event[i]->mean << endl; + if (hnb_event[i]->variance > 0.) { + os << STAT_label[STATL_SKEWNESS_COEFF] << ": " << hnb_event[i]->skewness_computation() << " " + << STAT_label[STATL_KURTOSIS_COEFF] << ": " << hnb_event[i]->kurtosis_computation() << endl; + } + + switch (type) { + + case ORDINARY : { + os << "\n" << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << hnb_event[i]->nb_element + << " (" << 1. / (hnb_event[i]->mean + 1.) << ")" << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << hnb_event[i]->mean * + hnb_event[i]->nb_element << " (" + << hnb_event[i]->mean / (hnb_event[i]->mean + 1.) << ")" << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n" << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << ": " << hnb_event[i]->frequency[0] + << " (" << hnb_event[i]->frequency[0] / (hnb_event[i]->nb_element * (hnb_event[i]->mean + 1.)) + << ")" << endl; + + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << (hnb_event[i]->nb_element - + hnb_event[i]->frequency[0]) * 2 << " (" + << (hnb_event[i]->nb_element - hnb_event[i]->frequency[0]) * 2. / + (hnb_event[i]->nb_element * (hnb_event[i]->mean + 1.)) << ")" << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << (hnb_event[i]->mean - 1.) * + hnb_event[i]->nb_element + hnb_event[i]->frequency[0] << " (" + << (hnb_event[i]->mean - 1. + (double)hnb_event[i]->frequency[0] / + (double)hnb_event[i]->nb_element) / (hnb_event[i]->mean + 1.) << ")" << endl; + break; + } + } + + if (exhaustive) { + os << "\n | " << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " + << i << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + hnb_event[i]->ascii_print(os); + } + } + } + + if ((htime->variance > 0.) && (exhaustive)) { + os << "\n" << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + mixture->ascii_characteristic_print(os); + + switch (type) { + + case ORDINARY : { + os << "\n" << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << mixture->nb_element + << " (" << 1. / (mixture->mean + 1.) << ")" << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << mixture->mean * + mixture->nb_element << " (" + << mixture->mean / (mixture->mean + 1.) << ")" << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n" << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << ": " << mixture->frequency[0] + << " (" << mixture->frequency[0] / (mixture->nb_element * (mixture->mean + 1.)) + << ")" << endl; + + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << (mixture->nb_element - + mixture->frequency[0]) * 2 << " (" + << (mixture->nb_element - mixture->frequency[0]) * 2. / + (mixture->nb_element * (mixture->mean + 1.)) << ")" << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << (mixture->mean - 1.) * + mixture->nb_element + mixture->frequency[0] << " (" + << (mixture->mean - 1. + (double)mixture->frequency[0] / + (double)mixture->nb_element) / (mixture->mean + 1.) << ")" << endl; + break; + } + } + + os << "\n | " << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + mixture->ascii_print(os); + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a TimeEvents object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& TimeEvents::ascii_write(ostream &os , bool exhaustive) const + +{ + return ascii_write(os , exhaustive , DEFAULT_TYPE); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a TimeEvents object in a file. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level, + * \param[in] type renewal process type (ORDINARY/EQUILIBRIUM). + */ +/*--------------------------------------------------------------*/ + +ostream& TimeEvents::ascii_file_write(ostream &os , bool exhaustive , process_type type) const + +{ + int i; + int max_frequency , width[3]; + + + if ((htime->variance > 0.) && (exhaustive)) { + os << "# " << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + htime->ascii_characteristic_print(os , false , true); + + os << "\n# | " << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + htime->ascii_print(os , true); + } + + // computation of the column widths + + if (exhaustive) { + width[0] = column_width(time[nb_class - 1]); + width[1] = column_width(nb_event[nb_class - 1]) + ASCII_SPACE; + + max_frequency = 0; + for (i = 0;i < nb_class;i++) { + if (frequency[i] > max_frequency) { + max_frequency = frequency[i]; + } + } + + width[2] = column_width(max_frequency) + ASCII_SPACE; + } + + for (i = 0;i < nb_class;i++) { + if ((i == 0) || ((i > 0) && (time[i] > time[i - 1]))) { + if (((exhaustive) && (htime->variance > 0.)) || (i > 0)) { + os << "\n"; + } + if (exhaustive) { + os << "# "; + } + os << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " + << time[i] << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + hnb_event[time[i]]->ascii_characteristic_print(os , false , exhaustive); + if (exhaustive) { + os << "# "; + } + os << STAT_label[STATL_VARIANCE_MEAN_RATIO] << ": " + << hnb_event[time[i]]->variance / hnb_event[time[i]]->mean << endl; + if (hnb_event[time[i]]->variance > 0.) { + if (exhaustive) { + os << "# "; + } + os << STAT_label[STATL_SKEWNESS_COEFF] << ": " << hnb_event[time[i]]->skewness_computation() << " " + << STAT_label[STATL_KURTOSIS_COEFF] << ": " << hnb_event[time[i]]->kurtosis_computation() << endl; + } + + switch (type) { + + case ORDINARY : { + os << "\n# " << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << hnb_event[time[i]]->nb_element + << " (" << 1. / (hnb_event[time[i]]->mean + 1.) << ")" << endl; + + os << "# " << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << hnb_event[time[i]]->mean * + hnb_event[time[i]]->nb_element << " (" + << hnb_event[time[i]]->mean / (hnb_event[time[i]]->mean + 1.) << ")" << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n# " << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << ": " << hnb_event[time[i]]->frequency[0] + << " (" << hnb_event[time[i]]->frequency[0] / (hnb_event[time[i]]->nb_element * (hnb_event[time[i]]->mean + 1.)) + << ")" << endl; + + os << "# " << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << (hnb_event[time[i]]->nb_element - + hnb_event[time[i]]->frequency[0]) * 2 << " (" + << (hnb_event[time[i]]->nb_element - hnb_event[time[i]]->frequency[0]) * 2. / + (hnb_event[time[i]]->nb_element * (hnb_event[time[i]]->mean + 1.)) << ")" << endl; + + os << "# " << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << (hnb_event[time[i]]->mean - 1.) * + hnb_event[time[i]]->nb_element + hnb_event[time[i]]->frequency[0] << " (" + << (hnb_event[time[i]]->mean - 1. + (double)hnb_event[time[i]]->frequency[0] / + (double)hnb_event[time[i]]->nb_element) / (hnb_event[time[i]]->mean + 1.) << ")" << endl; + break; + } + } + + if (exhaustive) { + os << "\n"; + } + } + + // writing of the triplets (observation period, number of events, frequency) + + if (exhaustive) { + os << setw(width[0]) << time[i]; + os << setw(width[1]) << nb_event[i]; + os << setw(width[2]) << frequency[i] << endl; + } + } + + if ((htime->variance > 0.) && (exhaustive)) { + os << "\n# " << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + mixture->ascii_characteristic_print(os , false , true); + + switch (type) { + + case ORDINARY : { + os << "\n# " << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << mixture->nb_element + << " (" << 1. / (mixture->mean + 1.) << ")" << endl; + + os << "# " << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << mixture->mean * + mixture->nb_element << " (" + << mixture->mean / (mixture->mean + 1.) << ")" << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n# " << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << ": " << mixture->frequency[0] + << " (" << mixture->frequency[0] / (mixture->nb_element * (mixture->mean + 1.)) + << ")" << endl; + + os << "# " << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << ": " << (mixture->nb_element - + mixture->frequency[0]) * 2 << " (" + << (mixture->nb_element - mixture->frequency[0]) * 2. / + (mixture->nb_element * (mixture->mean + 1.)) << ")" << endl; + + os << "# " << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << ": " << (mixture->mean - 1.) * + mixture->nb_element + mixture->frequency[0] << " (" + << (mixture->mean - 1. + (double)mixture->frequency[0] / + (double)mixture->nb_element) / (mixture->mean + 1.) << ")" << endl; + break; + } + } + + os << "\n# | " << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + mixture->ascii_print(os , true); + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a TimeEvents object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool TimeEvents::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + ascii_file_write(out_file , exhaustive); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a TimeEvents object at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] type renewal process type (ORDINARY/EQUILIBRIUM). + */ +/*--------------------------------------------------------------*/ + +ostream& TimeEvents::spreadsheet_write(ostream &os , process_type type) const + +{ + int i; + + + if (htime->variance > 0.) { + os << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + htime->spreadsheet_characteristic_print(os); + + os << "\n\t" << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + htime->spreadsheet_print(os); + } + + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + if ((htime->variance > 0.) || (i > htime->offset)) { + os << "\n"; + } + os << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " + << i << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + hnb_event[i]->spreadsheet_characteristic_print(os); + os << STAT_label[STATL_VARIANCE_MEAN_RATIO] << "\t" + << hnb_event[i]->variance / hnb_event[i]->mean << endl; + if (hnb_event[i]->variance > 0.) { + os << STAT_label[STATL_SKEWNESS_COEFF] << "\t" << hnb_event[i]->skewness_computation() << "\t" + << STAT_label[STATL_KURTOSIS_COEFF] << "\t" << hnb_event[i]->kurtosis_computation() << endl; + } + + switch (type) { + + case ORDINARY : { + os << "\n" << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << hnb_event[i]->nb_element + << "\t" << 1. / (hnb_event[i]->mean + 1.) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" << hnb_event[i]->mean * + hnb_event[i]->nb_element << "\t" + << hnb_event[i]->mean / (hnb_event[i]->mean + 1.) << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n" << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << "\t" << hnb_event[i]->frequency[0] << "\t" + << hnb_event[i]->frequency[0] / (hnb_event[i]->nb_element * (hnb_event[i]->mean + 1.)) << endl; + + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << (hnb_event[i]->nb_element - + hnb_event[i]->frequency[0]) * 2 << "\t" + << (hnb_event[i]->nb_element - hnb_event[i]->frequency[0]) * 2. / + (hnb_event[i]->nb_element * (hnb_event[i]->mean + 1.)) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" << (hnb_event[i]->mean - 1.) * + hnb_event[i]->nb_element + hnb_event[i]->frequency[0] << "\t" + << (hnb_event[i]->mean - 1. + (double)hnb_event[i]->frequency[0] / + (double)hnb_event[i]->nb_element) / (hnb_event[i]->mean + 1.) << endl; + break; + } + } + + os << "\n\t" << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " + << i << " " << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + hnb_event[i]->spreadsheet_print(os); + } + } + + if (htime->variance > 0.) { + os << "\n" << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + mixture->spreadsheet_characteristic_print(os); + + switch (type) { + + case ORDINARY : { + os << "\n" << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << mixture->nb_element + << "\t" << 1. / (mixture->mean + 1.) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" << mixture->mean * + mixture->nb_element << "\t" + << mixture->mean / (mixture->mean + 1.) << endl; + break; + } + + case EQUILIBRIUM : { + os << "\n" << SEQ_label[SEQL_2_CENSORED_INTER_EVENT] << "\t" << mixture->frequency[0] << "\t" + << mixture->frequency[0] / (mixture->nb_element * (mixture->mean + 1.)) << endl; + + os << SEQ_label[SEQL_1_CENSORED_INTER_EVENT] << "\t" << (mixture->nb_element - + mixture->frequency[0]) * 2 << "\t" + << (mixture->nb_element - mixture->frequency[0]) * 2. / + (mixture->nb_element * (mixture->mean + 1.)) << endl; + + os << SEQ_label[SEQL_COMPLETE_INTER_EVENT] << "\t" << (mixture->mean - 1.) * + mixture->nb_element + mixture->frequency[0] << "\t" + << (mixture->mean - 1. + (double)mixture->frequency[0] / + (double)mixture->nb_element) / (mixture->mean + 1.) << endl; + break; + } + } + + os << "\n\t" << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + mixture->spreadsheet_print(os); + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a TimeEvents object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool TimeEvents::spreadsheet_write(StatError &error , const string path) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + spreadsheet_write(out_file); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a TimeEvents object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool TimeEvents::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status; + int i , j , k; + int nb_histo; + const FrequencyDistribution **phisto; + ostringstream data_file_name; + + + error.init(); + + // writing of data file + + data_file_name << prefix << ".dat"; + + nb_histo = 0; + for (i = htime->offset + 1;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + nb_histo++; + } + } + if (htime->variance > 0.) { + nb_histo += 2; + } + + phisto = new const FrequencyDistribution*[nb_histo]; + + nb_histo = 0; + if (htime->variance > 0.) { + phisto[nb_histo++] = htime; + } + for (i = htime->offset + 1;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + phisto[nb_histo++] = hnb_event[i]; + } + } + if (htime->variance > 0.) { + phisto[nb_histo++] = mixture; + } + + status = hnb_event[htime->offset]->plot_print((data_file_name.str()).c_str() , nb_histo , phisto); + + delete [] phisto; + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + // writing of script files + + else { + for (i = 0;i < 2;i++) { + j = 1; + + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + if (htime->variance > 0.) { + if (htime->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(htime->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << htime->nb_value - 1 << "] [0:" + << (int)(htime->max * YSCALE) + 1 << "] \"" + << label((data_file_name.str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_OBSERVATION_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (htime->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(htime->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + for (k = htime->offset;k < htime->nb_value;k++) { + if (htime->frequency[k] > 0) { + if (((htime->variance > 0.) || (k > htime->offset)) && (i == 0)) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (hnb_event[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(hnb_event[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << hnb_event[k]->nb_value - 1 << "] [0:" + << (int)(hnb_event[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name.str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_NB_EVENT] << " " + << SEQ_label[SEQL_DURING] << " " << k << " " << SEQ_label[SEQL_TIME_UNIT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (hnb_event[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(hnb_event[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + } + + if (htime->variance > 0.) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (mixture->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(mixture->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << mixture->nb_value - 1 << "] [0:" + << (int)(mixture->max * YSCALE) + 1 << "] \"" + << label((data_file_name.str()).c_str()) << "\" using " << j + << " title \"" << SEQ_label[SEQL_NB_EVENT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (mixture->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(mixture->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a TimeEvents object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* TimeEvents::get_plotable() const + +{ + int i , j , k , m; + int nb_plot_set , nb_histo , max_nb_value , max_frequency; + double shift; + const FrequencyDistribution *phisto[2] , **merged_histo; + ostringstream legend; + MultiPlotSet *plot_set; + + + nb_plot_set = 1; + if (htime->variance > 0.) { + nb_plot_set += 2; + } + + plot_set = new MultiPlotSet(nb_plot_set); + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + i = 0; + if (htime->variance > 0.) { + + // observation period frequency distribution + + plot[i].xrange = Range(0 , htime->nb_value - 1); + plot[i].yrange = Range(0 , ceil(htime->max * YSCALE)); + + if (htime->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(htime->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + htime->plotable_frequency_write(plot[i][0]); + i++; + } + + // number of events frequency distribution for each observation period + + nb_histo = 0; + max_nb_value = 0; + max_frequency = 0; + + for (j = htime->offset;j < htime->nb_value;j++) { + if (htime->frequency[j] > 0) { + nb_histo++; + + // computation of the maximum number of values and the maximum frequency + + if (hnb_event[j]->nb_value > max_nb_value) { + max_nb_value = hnb_event[j]->nb_value; + } + if (hnb_event[j]->max > max_frequency) { + max_frequency = hnb_event[j]->max; + } + } + } + + plot[i].xrange = Range(0 , max_nb_value); + plot[i].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(nb_histo); + + j = 0; + shift = 0.; + + for (k = htime->offset;k < htime->nb_value;k++) { + if (htime->frequency[k] > 0) { + legend.str(""); + legend << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << k << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "impulses"; + + for (m = hnb_event[k]->offset;m < hnb_event[k]->nb_value;m++) { + if (hnb_event[k]->frequency[m] > 0) { + plot[i][j].add_point(m + shift , hnb_event[k]->frequency[m]); + } + } + + if (PLOT_SHIFT * (nb_histo - 1) < PLOT_MAX_SHIFT) { + shift += PLOT_SHIFT; + } + else { + shift += PLOT_MAX_SHIFT / (nb_histo - 1); + } + + j++; + } + } + + if (htime->variance > 0.) { + i++; + + // superimposed number of events frequency distributions + + merged_histo = new const FrequencyDistribution*[nb_histo]; + + j = nb_histo - 1; + for (k = htime->nb_value - 1;k >= htime->offset;k--) { + if (htime->frequency[k] > 0) { + if (j == nb_histo - 1) { + merged_histo[j] = new FrequencyDistribution(*hnb_event[k]); + } + + else { + phisto[0] = merged_histo[j + 1]; + phisto[1] = hnb_event[k]; + merged_histo[j] = new FrequencyDistribution(2 , phisto); + } + + j--; + } + } + + plot[i].xrange = Range(0 , merged_histo[0]->nb_value - 1); + plot[i].yrange = Range(0 , ceil(merged_histo[0]->max * YSCALE)); + + if (merged_histo[0]->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(merged_histo[0]->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(nb_histo); + + j = 0; + for (k = htime->offset;k < htime->nb_value;k++) { + if (htime->frequency[k] > 0) { + legend.str(""); + legend << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << k << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "impulses"; + + merged_histo[j]->plotable_frequency_write(plot[i][j]); + j++; + } + } + + for (j = 0;j < nb_histo;j++) { + delete merged_histo[j]; + } + delete [] merged_histo; + +/* plot[i].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + mixture->plotable_frequency_write(plot[i][0]); */ + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Search for the minimum mean time interval betweeen 2 events. + * + * \return minimum mean time interval betweeen 2 events. + */ +/*--------------------------------------------------------------*/ + +double TimeEvents::min_inter_event_computation() const + +{ + int i; + double ratio , min_ratio; + + + min_ratio = time[nb_class - 1]; + for (i = 0;i < nb_class;i++) { + if (nb_event[i] > 0) { + ratio = (double)time[i] / (double)nb_event[i]; + if (ratio < min_ratio) { + min_ratio = ratio; + } + } + } + + return min_ratio; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the sample size of a TimeEvents object. + */ +/*--------------------------------------------------------------*/ + +void TimeEvents::nb_element_computation() + +{ + int i; + + + nb_element = 0; + for (i = 0;i < nb_class;i++) { + nb_element += frequency[i]; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the RenewalData class. + */ +/*--------------------------------------------------------------*/ + +RenewalData::RenewalData() + +{ + renewal = NULL; + + type = EQUILIBRIUM; + + length = NULL; + sequence = NULL; + + inter_event = NULL; + within = NULL; + length_bias = NULL; + backward = NULL; + forward = NULL; + + index_event = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the RenewalData class. + * + * \param[in] nb_element sample size, + * \param[in] itime observation period. + */ +/*--------------------------------------------------------------*/ + +RenewalData::RenewalData(int nb_element , int itime) + +{ + renewal = NULL; + + type = EQUILIBRIUM; + + length = new int[nb_element]; + sequence = new int*[nb_element]; + + inter_event = NULL; + within = new FrequencyDistribution(itime); + length_bias = NULL; + backward = new FrequencyDistribution(itime); + forward = new FrequencyDistribution(itime + 1); + + index_event = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a RenewalData object from a TimeEvents object. + * + * \param[in] timev reference on a TimeEvents object, + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM). + */ +/*--------------------------------------------------------------*/ + +RenewalData::RenewalData(const TimeEvents &timev , process_type itype) +:TimeEvents(timev) + +{ + renewal = NULL; + + type = itype; + + length = NULL; + sequence = NULL; + + inter_event = NULL; + within = NULL; + length_bias = NULL; + backward = NULL; + forward = NULL; + + index_event = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a RenewalData object from a Renewal object. + * + * \param[in] itype renewal process type (ORDINARY/EQUILIBRIUM), + * \param[in] renew reference on a Renewal object. + */ +/*--------------------------------------------------------------*/ + +RenewalData::RenewalData(process_type itype , const Renewal &renew) + +{ + renewal = NULL; + + type = itype; + + length = NULL; + sequence = NULL; + + inter_event = new FrequencyDistribution(*(renew.inter_event)); + within = new FrequencyDistribution(*(renew.inter_event)); + length_bias = new FrequencyDistribution(*(renew.length_bias)); + backward = new FrequencyDistribution(renew.backward->alloc_nb_value); + forward = new FrequencyDistribution(*(renew.forward)); + + index_event = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor by merging of the RenewalData class. + * + * \param[in] nb_sample number of RenewalData objects, + * \param[in] itimev pointer on the RenewalData objects. + */ +/*--------------------------------------------------------------*/ + +RenewalData::RenewalData(int nb_sample , const RenewalData **itimev) + +{ + int i , j , k; + const TimeEvents **ptimev; + + + ptimev = new const TimeEvents*[nb_sample]; + + for (i = 0;i < nb_sample;i++) { + ptimev[i] = itimev[i]; + } + TimeEvents::merge(nb_sample , ptimev); + + delete [] ptimev; + + renewal = NULL; + + type = itimev[0]->type; + + length = new int[nb_element]; + sequence = new int*[nb_element]; + + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < itimev[j]->nb_element;k++) { + length[i] = itimev[j]->length[k]; + sequence[i] = new int[length[i]]; + i++; + } + } + + inter_event = NULL; + within = NULL; + length_bias = NULL; + backward = NULL; + forward = NULL; + + index_event = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a RenewalData object. + * + * \param[in] timev reference on a RenewalData object, + * \param[in] model_flag flag copy of the Renewal object. + */ +/*--------------------------------------------------------------*/ + +void RenewalData::copy(const RenewalData &timev , bool model_flag) + +{ + int i , j; + + + if ((model_flag) && (timev.renewal)) { + renewal = new Renewal(*(timev.renewal) , false); + } + else { + renewal = NULL; + } + + type = timev.type; + + length = new int[nb_element]; + for (i = 0;i < nb_element;i++) { + length[i] = timev.length[i]; + } + + sequence = new int*[nb_element]; + for (i = 0;i < nb_element;i++) { + sequence[i] = new int[length[i]]; + for (j = 0;j < length[i];j++) { + sequence[i][j] = timev.sequence[i][j]; + } + } + + if (timev.inter_event) { + inter_event = new FrequencyDistribution(*(timev.inter_event)); + } + else { + inter_event = NULL; + } + within = new FrequencyDistribution(*(timev.within)); + if (timev.length_bias) { + length_bias = new FrequencyDistribution(*(timev.length_bias)); + } + else { + length_bias = NULL; + } + backward = new FrequencyDistribution(*(timev.backward)); + forward = new FrequencyDistribution(*(timev.forward)); + + index_event = new Curves(*(timev.index_event)); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a RenewalData object. + */ +/*--------------------------------------------------------------*/ + +void RenewalData::remove() + +{ + delete renewal; + + delete [] length; + + if (sequence) { + int i; + + for (i = 0;i < nb_element;i++) { + delete [] sequence[i]; + } + delete [] sequence; + } + + delete inter_event; + delete within; + delete length_bias; + delete backward; + delete forward; + + delete index_event; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the RenewalData class. + */ +/*--------------------------------------------------------------*/ + +RenewalData::~RenewalData() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the RenewalData class. + * + * \param[in] timev reference on a RenewalData object. + * + * \return RenewalData object. + */ +/*--------------------------------------------------------------*/ + +RenewalData& RenewalData::operator=(const RenewalData &timev) + +{ + if (&timev != this) { + remove(); + TimeEvents::remove(); + + TimeEvents::copy(timev); + copy(timev); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of RenewalData objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of RenewalData objects, + * \param[in] itimev pointer on the RenewalData objects. + * + * \return RenewalData object. + */ +/*--------------------------------------------------------------*/ + +RenewalData* RenewalData::merge(StatError &error , int nb_sample , + const RenewalData **itimev) const + +{ + bool status = true; + int i , j , k , m; + const FrequencyDistribution **phisto; + RenewalData *timev; + const RenewalData **ptimev; + + + timev = NULL; + error.init(); + + for (i = 0;i < nb_sample;i++) { + if ((itimev[i]->type != type) || ((itimev[i]->inter_event) && (!inter_event)) || + ((!(itimev[i]->inter_event)) && (inter_event))) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_SAMPLE] << " " << i + 2 << ": " + << SEQ_error[SEQR_INCOMPATIBLE_RENEWAL_DATA]; + error.update((error_message.str()).c_str()); + } + } + + if (status) { + nb_sample++; + ptimev = new const RenewalData*[nb_sample]; + + ptimev[0] = this; + for (i = 1;i < nb_sample;i++) { + ptimev[i] = itimev[i - 1]; + } + + timev = new RenewalData(nb_sample , ptimev); + + // copy of the sequences of events + + i = 0; + for (j = 0;j < nb_sample;j++) { + for (k = 0;k < ptimev[j]->nb_element;k++) { + for (m = 0;m < ptimev[j]->length[k];m++) { + timev->sequence[i][m] = ptimev[j]->sequence[k][m]; + } + i++; + } + } + + phisto = new const FrequencyDistribution*[nb_sample]; + + if (inter_event) { + for (i = 0;i < nb_sample;i++) { + phisto[i] = ptimev[i]->inter_event; + } + timev->inter_event = new FrequencyDistribution(nb_sample , phisto); + } + + for (i = 0;i < nb_sample;i++) { + phisto[i] = ptimev[i]->within; + } + timev->within = new FrequencyDistribution(nb_sample , phisto); + + if (length_bias) { + for (i = 0;i < nb_sample;i++) { + phisto[i] = ptimev[i]->length_bias; + } + timev->length_bias = new FrequencyDistribution(nb_sample , phisto); + } + + for (i = 0;i < nb_sample;i++) { + phisto[i] = ptimev[i]->backward; + } + timev->backward = new FrequencyDistribution(nb_sample , phisto); + + for (i = 0;i < nb_sample;i++) { + phisto[i] = ptimev[i]->forward; + } + timev->forward = new FrequencyDistribution(nb_sample , phisto); + + timev->build_index_event(timev->type == ORDINARY ? 0 : 1); + + delete [] phisto; + delete [] ptimev; + } + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Merging of RenewalData objects. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sample number of RenewalData objects, + * \param[in] itimev RenewalData objects. + * + * \return RenewalData object. + */ +/*--------------------------------------------------------------*/ + +RenewalData* RenewalData::merge(StatError &error , int nb_sample , + const vector &itimev) const + +{ + int i; + RenewalData *timev; + const RenewalData **ptimev; + + + ptimev = new const RenewalData*[nb_sample]; + for (i = 0;i < nb_sample;i++) { + ptimev[i] = new RenewalData(itimev[i]); + } + + timev = merge(error , nb_sample , ptimev); + + for (i = 0;i < nb_sample;i++) { + delete ptimev[i]; + } + delete [] ptimev; + + return timev; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a frequency distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] histo_type frequency distribution type, + * \param[in] itime observation period. + * + * \return DiscreteDistributionData object. + */ +/*--------------------------------------------------------------*/ + +DiscreteDistributionData* RenewalData::extract(StatError &error , renewal_distribution histo_type , + int itime) const + +{ + bool status = true; + Distribution *pdist; + DiscreteParametric *pparam; + FrequencyDistribution *phisto; + DiscreteDistributionData *histo; + + + error.init(); + + if (histo_type == NB_EVENT) { + if ((itime < htime->offset) || (itime >= htime->nb_value) || (htime->frequency[itime] == 0)) { + histo = NULL; + error.update(SEQ_error[SEQR_OBSERVATION_TIME]); + } + else { + histo = new DiscreteDistributionData(*hnb_event[itime] , + (renewal ? renewal->nb_event[itime] : NULL)); + } + } + + else { + histo = NULL; + + switch (histo_type) { + + case INTER_EVENT : { + if (inter_event) { + phisto = inter_event; + } + else { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + break; + } + + case WITHIN_OBSERVATION_PERIOD : { + phisto = within; + break; + } + + case LENGTH_BIAS : { + if (length_bias) { + phisto = length_bias; + } + else { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + break; + } + + case BACKWARD_RECURRENCE_TIME : { + phisto = backward; + break; + } + + case FORWARD_RECURRENCE_TIME : { + phisto = forward; + break; + } + + case NB_EVENT_MIXTURE : { + phisto = mixture; + break; + } + } + + if ((status) && (phisto->nb_element == 0)) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + + if (status) { + pdist = NULL; + pparam = NULL; + + if (renewal) { + switch (histo_type) { + case INTER_EVENT : + pparam = renewal->inter_event; + break; + case WITHIN_OBSERVATION_PERIOD : + pparam = renewal->inter_event; + break; + case LENGTH_BIAS : + pdist = renewal->length_bias; + break; + case BACKWARD_RECURRENCE_TIME : + pdist = renewal->backward; + break; + case FORWARD_RECURRENCE_TIME : + pdist = renewal->forward; + break; + case NB_EVENT_MIXTURE : + pdist = renewal->mixture; + break; + } + } + + if (pdist) { + histo = new DiscreteDistributionData(*phisto , pdist); + } + else { + histo = new DiscreteDistributionData(*phisto , pparam); + } + } + } + + return histo; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a RenewalData object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level, + * \param[in] file_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& RenewalData::ascii_write(ostream &os , bool exhaustive , bool file_flag) const + +{ + int i , j; + int nb_value , max , width[2]; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + // writing of the inter-event frequency distribution, + // the frequency distribution of time intervals between events within the observation period, + // the length-biased frequency distribution, + // the backward and forward recurrence time frequency distributions + + if (inter_event) { + os << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + inter_event->ascii_characteristic_print(os , false , file_flag); + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_VARIATION_COEFF] << ": " + << sqrt(inter_event->variance) / inter_event->mean << endl; + } + + if ((exhaustive) || (!inter_event)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + within->ascii_characteristic_print(os , false , file_flag); + } + + if (exhaustive) { + if (length_bias) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + length_bias->ascii_characteristic_print(os , false , file_flag); + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + backward->ascii_characteristic_print(os , false , file_flag); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + forward->ascii_characteristic_print(os , false , file_flag); + + nb_value = within->nb_value; + if ((length_bias) && (length_bias->nb_value > nb_value)) { + nb_value = forward->nb_value; + } + if (backward->nb_value > nb_value) { + nb_value = backward->nb_value; + } + if (forward->nb_value > nb_value) { + nb_value = forward->nb_value; + } + + width[0] = column_width(nb_value - 1); + + if (inter_event) { + max = inter_event->max; + } + else { + max = within->max; + } + if (backward->max > max) { + max = backward->max; + } + if (forward->max > max) { + max = forward->max; + } + width[1] = column_width(max) + ASCII_SPACE; + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " "; + if (inter_event) { + os << " | " << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + os << " | " << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + if (length_bias) { + os << " | " << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + os << " | " << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " | " << STAT_label[STATL_FORWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + + for (i = 0;i < nb_value;i++) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << setw(width[0]) << i; + + if (inter_event) { + if (i < inter_event->nb_value) { + os << setw(width[1]) << inter_event->frequency[i]; + } + else { + os << setw(width[1]) << " "; + } + } + + if (i < within->nb_value) { + os << setw(width[1]) << within->frequency[i]; + } + else { + os << setw(width[1]) << " "; + } + + if (length_bias) { + if (i < length_bias->nb_value) { + os << setw(width[1]) << length_bias->frequency[i]; + } + else { + os << setw(width[1]) << " "; + } + } + + if (i < backward->nb_value) { + os << setw(width[1]) << backward->frequency[i]; + } + else { + os << setw(width[1]) << " "; + } + + if (i < forward->nb_value) { + os << setw(width[1]) << forward->frequency[i]; + } + else { + os << setw(width[1]) << " "; + } + } + os << endl; + } + + os << "\n"; + if (file_flag) { + ascii_file_write(os , exhaustive , type); + } + else { + TimeEvents::ascii_write(os , exhaustive , type); + } + + // writing of no-event/event probabilities as a function of time + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY] + << " | " << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_EVENT_PROBABILITY] + << " | " << STAT_label[STATL_FREQUENCY] << endl; + + index_event->ascii_print(os , file_flag); + + // writing of the sequences of events + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_SEQUENCES] << endl; + + for (i = 0;i < nb_element;i++) { + os << "\n"; + if (file_flag) { + os << "# "; + } + + for (j = 0;j < length[i];j++) { + if ((j > 0) && ((2 * j) % LINE_NB_CHARACTER == 0)) { + os << "\\" << endl; + if (file_flag) { + os << "# "; + } + } + + os << sequence[i][j] << " "; + } + + os << endl; + } + } + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a RenewalData object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& RenewalData::ascii_write(ostream &os , bool exhaustive) const + +{ + if (renewal) { + renewal->ascii_write(os , this , exhaustive , false); + } + else { + ascii_write(os , exhaustive , false); + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a RenewalData object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool RenewalData::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status = false; + + + ofstream out_file(path.c_str()); + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + + if (renewal) { + renewal->ascii_write(out_file , this , exhaustive , true); + } + else { + ascii_write(out_file , exhaustive , true); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a RenewalData object at the spreadsheet format. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& RenewalData::spreadsheet_write(ostream &os) const + +{ + int i; + int nb_value; + + + // writing of the inter-event frequency distribution, + // the frequency distribution of time intervals between events within the observation period, + // the length-biased frequency distribution, + // the backward and forward recurrence time frequency distributions + + if (inter_event) { + os << "\n" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + inter_event->spreadsheet_characteristic_print(os); + os << STAT_label[STATL_VARIATION_COEFF] << "\t" + << sqrt(inter_event->variance) / inter_event->mean << endl; + } + + os << "\n" << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + within->spreadsheet_characteristic_print(os); + + if (length_bias) { + os << "\n" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + length_bias->spreadsheet_characteristic_print(os); + } + os << "\n" << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + backward->spreadsheet_characteristic_print(os); + + os << "\n" << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + forward->spreadsheet_characteristic_print(os); + + nb_value = within->nb_value; + if ((length_bias) && (length_bias->nb_value > nb_value)) { + nb_value = forward->nb_value; + } + if (backward->nb_value > nb_value) { + nb_value = backward->nb_value; + } + if (forward->nb_value > nb_value) { + nb_value = forward->nb_value; + } + + os << "\n"; + if (inter_event) { + os << "\t" << SEQ_label[SEQL_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + os << "\t" << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + if (length_bias) { + os << " \t" << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + os << "\t" << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] + << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t" << STAT_label[STATL_FORWARD] + << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + + for (i = 0;i < nb_value;i++) { + os << "\n" << i; + + if (inter_event) { + os << "\t"; + if (i < inter_event->nb_value) { + os << inter_event->frequency[i]; + } + } + + os << "\t"; + if (i < within->nb_value) { + os << within->frequency[i]; + } + + if (length_bias) { + os << "\t"; + if (i < length_bias->nb_value) { + os << length_bias->frequency[i]; + } + } + + os << "\t"; + if (i < backward->nb_value) { + os << backward->frequency[i]; + } + os << "\t"; + if (i < forward->nb_value) { + os << forward->frequency[i]; + } + } + os << endl; + + os << "\n"; + TimeEvents::spreadsheet_write(os); + + // writing of no-event/event probabilities as a function of time + + os << "\n\t" << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY] + << "\t" << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_EVENT_PROBABILITY] + << "\t" << STAT_label[STATL_FREQUENCY] << endl; + + index_event->spreadsheet_print(os); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a RenewalData object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool RenewalData::spreadsheet_write(StatError &error , const string path) const + +{ + bool status = false; + + + ofstream out_file(path.c_str()); + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + + if (renewal) { + renewal->spreadsheet_write(out_file , this); + } + else { + spreadsheet_write(out_file); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a RenewalData object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool RenewalData::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status = false; + + + error.init(); + + if (renewal) { + status = renewal->plot_write(prefix , title , this); + } + + else { + int i , j , k; + int nb_histo; + const FrequencyDistribution **phisto; + ostringstream data_file_name[2]; + + + // writing of the data files + + data_file_name[0] << prefix << 0 << ".dat"; + + nb_histo = 2; + if (inter_event) { + nb_histo++; + } + if (within->nb_element > 0) { + nb_histo++; + } + if (length_bias) { + nb_histo++; + } + + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + nb_histo++; + } + } + if (htime->variance > 0.) { + nb_histo += 2; + } + + phisto = new const FrequencyDistribution*[nb_histo]; + + nb_histo = 0; + if (inter_event) { + phisto[nb_histo++] = inter_event; + } + if (within->nb_element > 0) { + phisto[nb_histo++] = within; + } + if (length_bias) { + phisto[nb_histo++] = length_bias; + } + phisto[nb_histo++] = backward; + phisto[nb_histo++] = forward; + + if (htime->variance > 0.) { + phisto[nb_histo++] = htime; + } + for (i = htime->offset;i < htime->nb_value;i++) { + if (htime->frequency[i] > 0) { + phisto[nb_histo++] = hnb_event[i]; + } + } + if (htime->variance > 0.) { + phisto[nb_histo++] = mixture; + } + + status = phisto[0]->plot_print((data_file_name[0].str()).c_str() , nb_histo - 1 , phisto + 1); + + delete [] phisto; + + if (status) { + data_file_name[1] << prefix << 1 << ".dat"; + index_event->plot_print((data_file_name[1].str()).c_str()); + + // writing of the script files + + for (i = 0;i < 2;i++) { + j = 1; + + ostringstream file_name[2]; + + switch (i) { + case 0 : + file_name[0] << prefix << ".plot"; + break; + case 1 : + file_name[0] << prefix << ".print"; + break; + } + + ofstream out_file((file_name[0].str()).c_str()); + + if (i == 1) { + out_file << "set terminal postscript" << endl; + file_name[1] << label(prefix) << ".ps"; + out_file << "set output \"" << file_name[1].str() << "\"\n\n"; + } + + out_file << "set border 15 lw 0\n" << "set tics out\n" << "set xtics nomirror\n" + << "set title"; + if (title) { + out_file << " \"" << title << "\""; + } + out_file << "\n\n"; + + if (inter_event) { + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(inter_event->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << inter_event->nb_value - 1 << "] [0:" + << (int)(inter_event->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_INTER_EVENT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(inter_event->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (within->nb_element > 0) { + if (within->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(within->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << within->nb_value - 1 << "] [0:" + << (int)(within->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (within->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(within->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (length_bias) { + if (length_bias->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(length_bias->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << length_bias->nb_value - 1 << "] [0:" + << (int)(length_bias->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_LENGTH_BIASED] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (length_bias->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(length_bias->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + } + + if (backward->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(backward->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << backward->nb_value - 1 << "] [0:" + << (int)(backward->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << STAT_label[STATL_BACKWARD] << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (backward->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(backward->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (forward->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(forward->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << forward->nb_value - 1 << "] [0:" + << (int)(forward->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << STAT_label[STATL_FORWARD] << " " + << SEQ_label[SEQL_RECURRENCE_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] + << "\" with impulses" << endl; + + if (forward->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(forward->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (htime->variance > 0.) { + if (htime->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(htime->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << htime->nb_value - 1 << "] [0:" + << (int)(htime->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_OBSERVATION_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (htime->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(htime->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + for (k = htime->offset;k < htime->nb_value;k++) { + if (htime->frequency[k] > 0) { + if (((htime->variance > 0.) || (k > htime->offset)) && (i == 0)) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (hnb_event[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(hnb_event[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << hnb_event[k]->nb_value - 1 << "] [0:" + << (int)(hnb_event[k]->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j++ + << " title \"" << SEQ_label[SEQL_NB_EVENT] << " " + << SEQ_label[SEQL_DURING] << " " << k << " " << SEQ_label[SEQL_TIME_UNIT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (hnb_event[k]->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(hnb_event[k]->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + } + + if (htime->variance > 0.) { + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (mixture->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + if ((int)(mixture->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics 0,1" << endl; + } + + out_file << "plot [0:" << mixture->nb_value - 1 << "] [0:" + << (int)(mixture->max * YSCALE) + 1 << "] \"" + << label((data_file_name[0].str()).c_str()) << "\" using " << j + << " title \"" << SEQ_label[SEQL_NB_EVENT] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\" with impulses" << endl; + + if (mixture->nb_value - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + if ((int)(mixture->max * YSCALE) + 1 < TIC_THRESHOLD) { + out_file << "set ytics autofreq" << endl; + } + } + + if (i == 0) { + out_file << "\npause -1 \"" << STAT_label[STATL_HIT_RETURN] << "\"" << endl; + } + out_file << endl; + + if (index_event->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics 0,1" << endl; + } + + out_file << "plot [" << index_event->offset << ":" << index_event->length - 1 << "] [0:1] " + << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 1 << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << SEQ_label[SEQL_NO_EVENT_PROBABILITY] << " \" with linespoints,\\" << endl; + out_file << "\"" << label((data_file_name[1].str()).c_str()) << "\" using " + << 2 << " title \"" << SEQ_label[SEQL_OBSERVED] << " " + << SEQ_label[SEQL_EVENT_PROBABILITY] << " \" with linespoints" << endl; + + if (index_event->length - 1 < TIC_THRESHOLD) { + out_file << "set xtics autofreq" << endl; + } + + if (i == 1) { + out_file << "\nset terminal x11" << endl; + } + + out_file << "\npause 0 \"" << STAT_label[STATL_END] << "\"" << endl; + } + } + } + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a RenewalData object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* RenewalData::get_plotable() const + +{ + MultiPlotSet *plot_set; + + + if (renewal) { + plot_set = renewal->get_plotable(this); + } + + else { + int i , j , k , m; + int nb_plot_set , nb_histo , max_nb_value , max_frequency; + double shift; + const FrequencyDistribution *phisto[2] , **merged_histo; + ostringstream title , legend; + + + nb_plot_set = 3; + if (inter_event) { + nb_plot_set++; + } + if ((within->nb_element > 0) || (length_bias)) { + nb_plot_set++; + } + if (htime->variance > 0.) { + nb_plot_set += 2; + } + + plot_set = new MultiPlotSet(nb_plot_set); + MultiPlotSet &plot = *plot_set; + + plot.border = "15 lw 0"; + + i = 0; + if (inter_event) { + + // inter-event frequency distribution + + plot[i].xrange = Range(0 , inter_event->nb_value - 1); + plot[i].yrange = Range(0 , ceil(inter_event->max * YSCALE)); + + if (inter_event->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(inter_event->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(within->nb_element > 0 ? 2 : 1); + + legend.str(""); + legend << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + inter_event->plotable_frequency_write(plot[i][0]); + + if (within->nb_element > 0) { + legend.str(""); + legend << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "impulses"; + + within->plotable_frequency_write(plot[i][1]); + } + + i++; + } + + if ((within->nb_element > 0) || (length_bias)) { + + // frequency distribution of time intervals between events within the observation period and + // length-biased frequency distribution + + nb_histo = 0; + max_nb_value = 0; + max_frequency = 0; + + if (within->nb_element > 0) { + nb_histo++; + + if (within->nb_value > max_nb_value) { + max_nb_value = within->nb_value; + } + if (within->max > max_frequency) { + max_frequency = within->max; + } + } + + if (length_bias) { + nb_histo++; + + if (length_bias->nb_value > max_nb_value) { + max_nb_value = length_bias->nb_value; + } + if (length_bias->max > max_frequency) { + max_frequency = length_bias->max; + } + } + + plot[i].xrange = Range(0 , max_nb_value); + plot[i].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(nb_histo); + + j = 0; + if (within->nb_element > 0) { + legend.str(""); + legend << STAT_label[STATL_OBSERVATION_INTER_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "impulses"; + + within->plotable_frequency_write(plot[i][j]); + j++; + } + + if (length_bias) { + legend.str(""); + legend << SEQ_label[SEQL_LENGTH_BIASED] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "impulses"; + + for (k = length_bias->offset;k < length_bias->nb_value;k++) { + if (length_bias->frequency[k] > 0) { + plot[i][j].add_point(k + j * PLOT_SHIFT , length_bias->frequency[k]); + } + } + } + + i++; + } + + // backward and forward recurrence time frequency distributions + + max_nb_value = MAX(backward->nb_value , forward->nb_value); + max_frequency = MAX(backward->max , forward->max); + + plot[i].xrange = Range(0 , max_nb_value); + plot[i].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(2); + + legend.str(""); + legend << STAT_label[STATL_BACKWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + backward->plotable_frequency_write(plot[i][0]); + + legend.str(""); + legend << STAT_label[STATL_FORWARD] << " " << SEQ_label[SEQL_RECURRENCE_TIME] << " " + << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "impulses"; + + for (j = forward->offset;j < forward->nb_value;j++) { + if (forward->frequency[j] > 0) { + plot[i][1].add_point(j + PLOT_SHIFT , forward->frequency[j]); + } + } + i++; + + if (htime->variance > 0.) { + + // observation period frequency distribution + + plot[i].xrange = Range(0 , htime->nb_value - 1); + plot[i].yrange = Range(0 , ceil(htime->max * YSCALE)); + + if (htime->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(htime->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_OBSERVATION_TIME] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + htime->plotable_frequency_write(plot[i][0]); + i++; + } + + // number of events frequency distribution for each observation period + + if (htime->variance > 0.) { + title.str(""); + title << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTIONS]; + plot[i].title = title.str(); + } + + // computation of the maximum number of values and the maximum frequency + + nb_histo = 0; + max_nb_value = 0; + max_frequency = 0; + + for (j = htime->offset;j < htime->nb_value;j++) { + if (htime->frequency[j] > 0) { + nb_histo++; + + if (hnb_event[j]->nb_value > max_nb_value) { + max_nb_value = hnb_event[j]->nb_value; + } + if (hnb_event[j]->max > max_frequency) { + max_frequency = hnb_event[j]->max; + } + } + } + + plot[i].xrange = Range(0 , max_nb_value); + plot[i].yrange = Range(0 , ceil(max_frequency * YSCALE)); + + if (max_nb_value < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(max_frequency * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(nb_histo); + + j = 0; + shift = 0.; + + for (k = htime->offset;k < htime->nb_value;k++) { + if (htime->frequency[k] > 0) { + legend.str(""); + if (htime->variance > 0.) { + legend << k << " " << SEQ_label[SEQL_TIME_UNIT]; + } + else { + legend << SEQ_label[SEQL_NB_EVENT] << " " << SEQ_label[SEQL_DURING] << " " << k << " " + << SEQ_label[SEQL_TIME_UNIT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + } + plot[i][j].legend = legend.str(); + + plot[i][j].style = "impulses"; + + for (m = hnb_event[k]->offset;m < hnb_event[k]->nb_value;m++) { + if (hnb_event[k]->frequency[m] > 0) { + plot[i][j].add_point(m + shift , hnb_event[k]->frequency[m]); + } + } + + if (PLOT_SHIFT * (nb_histo - 1) < PLOT_MAX_SHIFT) { + shift += PLOT_SHIFT; + } + else { + shift += PLOT_MAX_SHIFT / (nb_histo - 1); + } + + j++; + } + } + i++; + + if (htime->variance > 0.) { + + // superimposed number of events frequency distributions + + merged_histo = new const FrequencyDistribution*[nb_histo]; + + j = nb_histo - 1; + for (k = htime->nb_value - 1;k >= htime->offset;k--) { + if (htime->frequency[k] > 0) { + if (j == nb_histo - 1) { + merged_histo[j] = new FrequencyDistribution(*hnb_event[k]); + } + + else { + phisto[0] = merged_histo[j + 1]; + phisto[1] = hnb_event[k]; + merged_histo[j] = new FrequencyDistribution(2 , phisto); + } + + j--; + } + } + + title.str(""); + title << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTIONS]; + plot[i].title = title.str(); + + plot[i].xrange = Range(0 , merged_histo[0]->nb_value - 1); + plot[i].yrange = Range(0 , ceil(merged_histo[0]->max * YSCALE)); + + if (merged_histo[0]->nb_value - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + if (ceil(merged_histo[0]->max * YSCALE) < TIC_THRESHOLD) { + plot[i].ytics = 1; + } + + plot[i].resize(nb_histo); + + j = 0; + for (k = htime->offset;k < htime->nb_value;k++) { + if (htime->frequency[k] > 0) { + legend.str(""); + legend << k << " " << SEQ_label[SEQL_TIME_UNIT]; + plot[i][j].legend = legend.str(); + + plot[i][j].style = "impulses"; + + merged_histo[j]->plotable_frequency_write(plot[i][j]); + j++; + } + } + + for (j = 0;j < nb_histo;j++) { + delete merged_histo[j]; + } + delete [] merged_histo; + +/* plot[i].resize(1); + + legend.str(""); + legend << SEQ_label[SEQL_NB_EVENT] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "impulses"; + + mixture->plotable_frequency_write(plot[i][0]); */ + + i++; + } + + // no-event/event probabilities as a function of time + + plot[i].xrange = Range(0 , index_event->length - 1); + plot[i].yrange = Range(0. , 1.); + + if (index_event->length - 1 < TIC_THRESHOLD) { + plot[i].xtics = 1; + } + + plot[i].resize(2); + + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_NO_EVENT_PROBABILITY]; + plot[i][0].legend = legend.str(); + + plot[i][0].style = "linespoints"; + + index_event->plotable_write(0 , plot[i][0]); + + legend.str(""); + legend << SEQ_label[SEQL_OBSERVED] << " " << SEQ_label[SEQL_EVENT_PROBABILITY]; + plot[i][1].legend = legend.str(); + + plot[i][1].style = "linespoints"; + + index_event->plotable_write(1 , plot[i][1]); + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of no-event/event probabilities as a function of time. + * + * \param[in] offset shift (0/1). + */ +/*--------------------------------------------------------------*/ + +void RenewalData::build_index_event(int offset) + +{ + int i , j; + int frequency[2]; + + + index_event = new Curves(2 , htime->nb_value , true , false , false); + index_event->offset = offset; + + for (i = index_event->offset;i < index_event->length;i++) { + frequency[0] = 0; + frequency[1] = 0; + + for (j = 0;j < nb_element;j++) { + if (i - index_event->offset < length[j]) { + frequency[sequence[j][i - index_event->offset]]++; + } + } + + index_event->frequency[i] = frequency[0] + frequency[1]; + index_event->point[0][i] = (double)frequency[0] / (double)index_event->frequency[i]; + index_event->point[1][i] = (double)frequency[1] / (double)index_event->frequency[i]; + } +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/variable_order_markov.cpp b/src/cpp/sequence_analysis/variable_order_markov.cpp new file mode 100644 index 0000000..4428775 --- /dev/null +++ b/src/cpp/sequence_analysis/variable_order_markov.cpp @@ -0,0 +1,6274 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "stat_tool/stat_label.h" + +#include "variable_order_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost; +using namespace boost::math; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the VariableOrderMarkovChain class. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovChain::VariableOrderMarkovChain() + +{ + max_order = 0; + + memo_type = NULL; + order = NULL; + state = NULL; + + parent = NULL; + child = NULL; + + next = NULL; + nb_memory = NULL; + previous = NULL; + + state_process = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkovChain class. + * + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] inb_state number of states, + * \param[in] inb_row number of memories. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovChain::VariableOrderMarkovChain(process_type itype , int inb_state , int inb_row) +:Chain(itype , inb_state , inb_row , true) + +{ + int i; + + max_order = 0; + + memo_type = new memory_type[nb_row]; + order = new int[nb_row]; + state = new int*[nb_row]; + parent = new int[nb_row]; + child = new int*[nb_row]; + + for (i = 0;i < nb_row;i++) { + state[i] = NULL; + child[i] = NULL; + } + + next = NULL; + nb_memory = NULL; + previous = NULL; + + state_process = new CategoricalSequenceProcess(nb_state , nb_state); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkovChain class. + * + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] inb_state number of states, + * \param[in] inb_row number of memories, + * \param[in] imax_order maximum order. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovChain::VariableOrderMarkovChain(process_type itype , int inb_state , + int inb_row , int imax_order) +:Chain(itype , inb_state , inb_row , true) + +{ + int i; + + + max_order = imax_order; + + memo_type = new memory_type[nb_row]; + order = new int[nb_row]; + state = new int*[nb_row]; + parent = new int[nb_row]; + child = new int*[nb_row]; + + for (i = 0;i < nb_row;i++) { + state[i] = new int[max_order]; + child[i] = NULL; + } + + next = NULL; + nb_memory = NULL; + previous = NULL; + + state_process = new CategoricalSequenceProcess(nb_state , nb_state); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a VariableOrderMarkovChain object of fixed order. + * + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] inb_state number of states, + * \param[in] iorder order, + * \param[in] init_flag flag initialization. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovChain::VariableOrderMarkovChain(process_type itype , int inb_state , + int iorder , bool init_flag) +:Chain(itype , inb_state , (int)(pow((double)inb_state , iorder + 1) - 1) / (inb_state - 1) , init_flag) + +{ + int i , j; + + max_order = iorder; + + memo_type = new memory_type[nb_row]; + order = new int[nb_row]; + state = new int*[nb_row]; + parent = new int[nb_row]; + child = new int*[nb_row]; + + // root (zero order) + + memo_type[0] = NON_TERMINAL; + order[0] = 0; + state[0] = NULL; + parent[0] = -1; + child[0] = new int[nb_state]; + + for (i = 1;i < nb_row;i++) { + + // case increase of the order + + if (order[i - 1] < max_order) { + order[i] = order[i - 1] + 1; + if (order[i] < max_order) { + memo_type[i] = NON_TERMINAL; + } + else { + memo_type[i] = TERMINAL; + } + + state[i] = new int[order[i]]; + for (j = 0;j < order[i - 1];j++) { + state[i][j] = state[i - 1][j]; + } + state[i][order[i] - 1] = 0; + + parent[i] = i - 1; + child[i - 1][0] = i; + } + + else { + + // case stable (maximum) order + + if (state[i - 1][order[i - 1] - 1] < nb_state - 1) { + memo_type[i] = TERMINAL; + order[i] = max_order; + state[i] = new int[order[i]]; + for (j = 0;j < order[i] - 1;j++) { + state[i][j] = state[i - 1][j]; + } + state[i][order[i] - 1] = state[i - 1][order[i] - 1] + 1; + + parent[i] = i - state[i][order[i] - 1] - 1; + child[parent[i]][state[i][order[i] - 1]] = i; + } + + // case decrease of the order + + else { + memo_type[i] = NON_TERMINAL; + + for (j = order[i - 1] - 2;j >= 0;j--) { + if (state[i - 1][j] != nb_state - 1) { + break; + } + } + order[i] = j + 1; + + state[i] = new int[order[i]]; + for (j = 0;j < order[i] - 1;j++) { + state[i][j] = state[i - 1][j]; + } + state[i][order[i] - 1] = state[i - 1][order[i] - 1] + 1; + + // search for the parent vertex + + find_parent_memory(i); + } + } + + if (memo_type[i] == NON_TERMINAL) { + child[i] = new int[nb_state]; + } + else { + child[i] = NULL; + } + } + + // computation of the transitions between terminal memories + + next = NULL; + nb_memory = NULL; + previous = NULL; + + build_memory_transition(); + + state_process = new CategoricalSequenceProcess(nb_state , nb_state , false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Completion of the memory tree. + * + * \param[in] markov reference on a VariableOrderMarkovChain object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::memory_tree_completion(const VariableOrderMarkovChain &markov) + +{ + bool prefix; + int i , j , k , m; + int bnb_memory , border , *markov_next , *completion_next; + VariableOrderMarkovChain *completion; + + + bnb_memory = markov.nb_row; + for (i = 1;i < markov.nb_row;i++) { + if (markov.order[i] == markov.max_order) { + bnb_memory--; + } + } + + completion = new VariableOrderMarkovChain(markov.type , markov.nb_state , + (int)(pow((double)markov.nb_state , markov.max_order) - 1) / (markov.nb_state - 1) - bnb_memory); + completion->nb_row = 0; + + for (i = 1;i < markov.nb_row;i++) { + if (markov.order[i] > 1) { + + // search for the vertex corresponding to the longer proper prefix + + prefix = false; + + for (j = 0;j < markov.nb_row;j++) { + if (markov.order[j] == markov.order[i] - 1) { + for (k = 0;k < markov.order[j];k++) { + if (markov.state[j][k] != markov.state[i][k + 1]) { + break; + } + } + + if (k == markov.order[j]) { + prefix = true; + break; + } + } + } + + if (!prefix) { + for (j = 0;j < completion->nb_row;j++) { + if (completion->order[j] == markov.order[i] - 1) { + for (k = 0;k < completion->order[j];k++) { + if (completion->state[j][k] != markov.state[i][k + 1]) { + break; + } + } + + if (k == completion->order[j]) { + prefix = true; + break; + } + } + } + + if (!prefix) { + + // construction of the vertex corresponding to the longer proper prefix + + completion->order[completion->nb_row] = markov.order[i] - 1; + completion->state[completion->nb_row] = new int[completion->order[completion->nb_row]]; + for (j = 0;j < completion->order[completion->nb_row];j++) { + completion->state[completion->nb_row][j] = markov.state[i][j + 1]; + } + for (j = 0;j < markov.nb_state;j++) { + completion->transition[completion->nb_row][j] = markov.transition[i][j]; + } + (completion->nb_row)++; + + for (j = completion->order[completion->nb_row - 1] - 1;j >= 2;j--) { + prefix = false; + + for (k = 0;k < markov.nb_row;k++) { + if (markov.order[k] == j) { + for (m = 0;m < markov.order[k];m++) { + if (markov.state[k][m] != completion->state[completion->nb_row - 1][m + 1]) { + break; + } + } + + if (m == markov.order[k]) { + prefix = true; + break; + } + } + } + + if (!prefix) { + for (k = 0;k < completion->nb_row - 1;k++) { + if (completion->order[k] == j) { + for (m = 0;m < completion->order[k];m++) { + if (completion->state[k][m] != completion->state[completion->nb_row - 1][m + 1]) { + break; + } + } + + if (m == completion->order[k]) { + prefix = true; + break; + } + } + } + + if (!prefix) { + + // construction of the vertex corresponding of the longer proper prefix + + completion->order[completion->nb_row] = j; + completion->state[completion->nb_row] = new int[completion->order[completion->nb_row]]; + for (k = 0;k < completion->order[completion->nb_row];k++) { + completion->state[completion->nb_row][k] = completion->state[completion->nb_row - 1][k + 1]; + } + for (k = 0;k < markov.nb_state;k++) { + completion->transition[completion->nb_row][k] = completion->transition[completion->nb_row - 1][k]; + } + (completion->nb_row)++; + } + + else { + break; + } + } + } + } + } + } + } + +# ifdef DEBUG + { + cout << "\n"; + for (i = 0;i < completion->nb_row;i++) { + cout << completion->order[i] << " | "; + for (j = completion->order[i] - 1;j >= 0;j--) { + cout << completion->state[i][j] << " "; + } + cout << endl; + } + } +# endif + + // search for the following vertex in the ordering of the completed memory tree + + markov_next = new int[markov.nb_row]; + completion_next = new int[completion->nb_row]; + + for (i = 0;i < markov.nb_row;i++) { + markov_next[i] = I_DEFAULT; + + if ((markov.memo_type[i] == TERMINAL) && (markov.order[i] < markov.max_order - 1)) { + + // search for the 1st child (state 0) of the terminal vertex + + for (j = 0;j < completion->nb_row;j++) { + if (completion->order[j] == markov.order[i] + 1) { + for (k = 0;k < markov.order[i];k++) { + if (completion->state[j][k] != markov.state[i][k]) { + break; + } + } + + if ((k == markov.order[i]) && (completion->state[j][completion->order[j] - 1] == 0)) { + markov_next[i] = j; + break; + } + } + } + } + } + + for (i = 0;i < completion->nb_row;i++) { + completion_next[i] = I_DEFAULT; + + // search for the 1st child (state 0) of the built vertex + + if (completion->order[i] < markov.max_order - 1) { + for (j = 0;j < completion->nb_row;j++) { + if (completion->order[j] == completion->order[i] + 1) { + for (k = 0;k < completion->order[i];k++) { + if (completion->state[j][k] != completion->state[i][k]) { + break; + } + } + + if ((k == completion->order[i]) && (completion->state[j][completion->order[j] - 1] == 0)) { + completion_next[i] = j; + break; + } + } + } + } + + if ((completion->order[i] == markov.max_order - 1) || (j == completion->nb_row)) { + if (completion->state[i][completion->order[i] - 1] < markov.nb_state - 1) { + + // search for siblings (following states) of the built vertex + + for (j = 0;j < completion->nb_row;j++) { + if (completion->order[j] == completion->order[i]) { + for (k = 0;k < completion->order[i] - 1;k++) { + if (completion->state[j][k] != completion->state[i][k]) { + break; + } + } + + if ((k == completion->order[i] - 1) && + (completion->state[j][completion->order[j] - 1] == completion->state[i][completion->order[i] - 1] + 1)) { + completion_next[i] = j; + break; + } + } + } + } + + else { + for (j = completion->order[i] - 2;j >= 0;j--) { + if (completion->state[i][j] != markov.nb_state - 1) { + break; + } + } + + border = j + 1; + for (j = 0;j < completion->nb_row;j++) { + if (completion->order[j] == border) { + for (k = 0;k < border - 1;k++) { + if (completion->state[j][k] != completion->state[i][k]) { + break; + } + } + + if ((k == border - 1) && + (completion->state[j][border - 1] == completion->state[i][border - 1] + 1)) { + completion_next[i] = j; + break; + } + } + } + } + } + } + +# ifdef DEBUG + { + cout << "\n"; + for (i = 0;i < markov.nb_row;i++) { + for (j = markov.order[i] - 1;j >= 0;j--) { + cout << markov.state[i][j] << " "; + } + if (markov_next[i] != I_DEFAULT) { + cout << " -> "; + for (j = completion->order[markov_next[i]] - 1;j >= 0;j--) { + cout << completion->state[markov_next[i]][j] << " "; + } + } + cout << endl; + } + + cout << "\n"; + for (i = 0;i < completion->nb_row;i++) { + for (j = completion->order[i] - 1;j >= 0;j--) { + cout << completion->state[i][j] << " "; + } + if (completion_next[i] != I_DEFAULT) { + cout << " -> "; + for (j = completion->order[completion_next[i]] - 1;j >= 0;j--) { + cout << completion->state[completion_next[i]][j] << " "; + } + } + cout << endl; + } + } +# endif + + // copy of parameters + + type = markov.type; + nb_state = markov.nb_state; + nb_row = markov.nb_row + completion->nb_row; + + if (markov.nb_component > 0) { + accessibility = new bool*[nb_state]; + for (i = 0;i < nb_state;i++) { + accessibility[i] = new bool[nb_state]; + for (j = 0;j < nb_state;j++) { + accessibility[i][j] = markov.accessibility[i][j]; + } + } + + nb_component = markov.nb_component; + component_nb_state = new int[nb_component]; + component = new int*[nb_component]; + + for (i = 0;i < nb_component;i++) { + component_nb_state[i] = markov.component_nb_state[i]; + component[i] = new int[component_nb_state[i]]; + for (j = 0;j < component_nb_state[i];j++) { + component[i][j] = markov.component[i][j]; + } + } + + stype = new state_type[nb_state]; + for (i = 0;i < nb_state;i++) { + stype[i] = markov.stype[i]; + } + } + + else { + accessibility = NULL; + nb_component = 0; + component_nb_state = NULL; + component = NULL; + stype = NULL; + } + + initial = new double[type == ORDINARY ? nb_state : nb_row]; + + if (type == ORDINARY) { + for (i = 0;i < nb_state;i++) { + initial[i] = markov.initial[i]; + } + } + + transition = new double*[nb_row]; + for (i = 0;i < nb_row;i++) { + transition[i] = new double[nb_state]; + } + + cumul_initial = NULL; + cumul_transition = NULL; + + max_order = markov.max_order; + + memo_type = new memory_type[nb_row]; + order = new int[nb_row]; + state = new int*[nb_row]; + parent = new int[nb_row]; + child = new int*[nb_row]; + + next = NULL; + nb_memory = NULL; + previous = NULL; + + // insertion of the memories in the out-tree + + i = 0; + for (j = 0;j < markov.nb_row;j++) { + for (k = 0;k < nb_state;k++) { + transition[i][k] = markov.transition[j][k]; + } + + memo_type[i] = markov.memo_type[j]; + + order[i] = markov.order[j]; + state[i] = new int[order[i]]; + for (k = 0;k < order[i];k++) { + state[i][k] = markov.state[j][k]; + } + + parent[i] = markov.parent[j]; + + if ((markov.memo_type[j] == NON_TERMINAL) || (markov_next[j] != I_DEFAULT)) { + child[i] = new int[nb_state]; + + if (markov.memo_type[j] == NON_TERMINAL) { + for (k = 0;k < nb_state;k++) { + child[i][k] = markov.child[j][k]; + } + } + } + + else { + child[i] = NULL; + } + + i++; + + if (markov_next[j] != I_DEFAULT) { + k = markov_next[j]; + bnb_memory = i; + + do { + memo_type[i] = COMPLETION; + + order[i] = completion->order[k]; + state[i] = new int[order[i]]; + for (m = 0;m < order[i];m++) { + state[i][m] = completion->state[k][m]; + } + + find_parent_memory(i); + + for (m = 0;m < nb_state;m++) { + transition[i][m] = transition[parent[i]][m]; + } + + if ((completion_next[k] != I_DEFAULT) && + (completion->order[completion_next[k]] == completion->order[k] + 1)) { + child[i] = new int[nb_state]; + } + else { + child[i] = NULL; + } + + i++; + k = completion_next[k]; + } + while (k != I_DEFAULT); + + // update of the relationships parent/children + + for (k = 0;k < bnb_memory;k++) { + if (memo_type[k] == NON_TERMINAL) { + for (m = 0;m < nb_state;m++) { + if (child[k][m] >= bnb_memory) { + child[k][m] += i - bnb_memory; + } + } + } + } + + for (k = j + 1;k < markov.nb_row;k++) { + if (markov.parent[k] >= bnb_memory) { + markov.parent[k] += i - bnb_memory; + } + if (markov.memo_type[k] == NON_TERMINAL) { + for (m = 0;m < nb_state;m++) { + if (markov.child[k][m] >= bnb_memory) { + markov.child[k][m] += i - bnb_memory; + } + } + } + } + } + } + +# ifdef DEBUG + { + cout << "\n"; + cout << "Suffix free? " << (check_free_suffix() ? "True" : "False") << endl; + + for (i = 0;i < nb_row;i++) { + cout << i << " "; + for (j = max_order - 1;j >= order[i];j--) { + cout << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + cout << state[i][j] << " "; + } + + switch (memo_type[i]) { + case NON_TERMINAL : + cout << " " << SEQ_label[SEQL_NON_TERMINAL]; + break; + case TERMINAL : + cout << " " << SEQ_label[SEQL_TERMINAL]; + break; + case COMPLETION : + cout << " " << SEQ_label[SEQL_COMPLETION]; + break; + } + + cout << " | " << parent[i]; + + if (child[i]) { + cout << " |"; + for (j = 0;j < nb_state;j++) { + cout << " " << child[i][j]; + } + } + + else { + cout << " "; + for (j = 0;j < nb_state;j++) { + cout << " "; + } + } + + cout << endl; + } + } +# endif + + delete completion; + + delete [] markov_next; + delete [] completion_next; + + build_memory_transition(); + build_previous_memory(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a VariableOrderMarkovChain object with completion of + * the memory tree, computation of the transition distributions for the non-terminal memories + * (ordinary process) or computation of the stationary distribution (equilibrium process). + * + * \param[in] markov reference on a VariableOrderMarkovChain object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::build(const VariableOrderMarkovChain &markov) + +{ + int i; + int nb_terminal; + + + memory_tree_completion(markov); + + switch (type) { + + case ORDINARY : { + non_terminal_transition_probability_computation(); + break; + } + + case EQUILIBRIUM : { + nb_terminal = (nb_row - 1) * (nb_state - 1) / nb_state + 1; + + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + initial[i] = 1. / (double)nb_terminal; + } + else { + initial[i] = 0.; + } + } + + initial_probability_computation(); + break; + } + } + + state_process = new CategoricalSequenceProcess(nb_state , nb_state); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a VariableOrderMarkovChain object. + * + * \param[in] markov reference on a VariableOrderMarkovChain object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::copy(const VariableOrderMarkovChain &markov) + +{ + int i , j; + + + memo_type = new memory_type[nb_row]; + for (i = 0;i < nb_row;i++) { + memo_type[i] = markov.memo_type[i]; + } + + order = new int[nb_row]; + for (i = 0;i < nb_row;i++) { + order[i] = markov.order[i]; + } + + max_order = markov.max_order; + + state = new int*[nb_row]; + for (i = 0;i < nb_row;i++) { + state[i] = new int[order[i]]; + for (j = 0;j < order[i];j++) { + state[i][j] = markov.state[i][j]; + } + } + + parent = new int[nb_row]; + for (i = 0;i < nb_row;i++) { + parent[i] = markov.parent[i]; + } + + child = new int*[nb_row]; + for (i = 0;i < nb_row;i++) { + if (markov.child[i]) { + child[i] = new int[nb_state]; + for (j = 0;j < nb_state;j++) { + child[i][j] = markov.child[i][j]; + } + } + else { + child[i] = NULL; + } + } + + if (markov.next) { + next = new int*[nb_row]; + for (i = 0;i < nb_row;i++) { + if (markov.next[i]) { + next[i] = new int[nb_state]; + for (j = 0;j < nb_state;j++) { + next[i][j] = markov.next[i][j]; + } + } + else { + next[i] = NULL; + } + } + } + else { + next = NULL; + } + + if (markov.nb_memory) { + nb_memory = new int[nb_row]; + for (i = 0;i < nb_row;i++) { + nb_memory[i] = markov.nb_memory[i]; + } + } + else { + nb_memory = NULL; + } + + if (markov.previous) { + previous = new int*[nb_row]; + for (i = 0;i < nb_row;i++) { + if (markov.previous[i]) { + previous[i] = new int[nb_memory[i]]; + for (j = 0;j < nb_memory[i];j++) { + previous[i][j] = markov.previous[i][j]; + } + } + else { + previous[i] = NULL; + } + } + } + else { + previous = NULL; + } + + state_process = new CategoricalSequenceProcess(*(markov.state_process)); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a VariableOrderMarkovChain object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::remove() + +{ + int i; + + + delete [] memo_type; + delete [] order; + + if (state) { + for (i = 0;i < nb_row;i++) { + delete [] state[i]; + } + delete [] state; + } + + delete [] parent; + + if (child) { + for (i = 0;i < nb_row;i++) { + delete [] child[i]; + } + delete [] child; + } + + if (next) { + for (i = 1;i < nb_row;i++) { + delete [] next[i]; + } + delete [] next; + } + + delete [] nb_memory; + + if (previous) { + for (i = 1;i < nb_row;i++) { + delete [] previous[i]; + } + delete [] previous; + } + + delete state_process; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the VariableOrderMarkovChain class. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovChain::~VariableOrderMarkovChain() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the VariableOrderMarkovChain class. + * + * \param[in] markov reference on a VariableOrderMarkovChain object. + * + * \return VariableOrderMarkovChain object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovChain& VariableOrderMarkovChain::operator=(const VariableOrderMarkovChain &markov) + +{ + if (&markov != this) { + remove(); + Chain::remove(); + + Chain::copy(markov); + copy(markov); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Search for the parent memory. + * + * \param[in] index memory index. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::find_parent_memory(int index) + +{ + int i; + + + for (i = index - 1;i >= 0;i--) { + if (order[i] == order[index] - 1) { + parent[index] = i; + child[i][state[index][order[index] - 1]] = index; + break; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the transitions between memories. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::build_memory_transition() + +{ + if (!next) { + int i , j , k; + int bnb_memory; + + +# ifdef DEBUG + cout << "\n"; +# endif + + next = new int*[nb_row]; + next[0] = NULL; + + for (i = 1;i < nb_row;i++) { + if ((type == ORDINARY) || (!child[i])) { + next[i] = new int[nb_state]; + +# ifdef DEBUG + for (j = order[i] - 1;j >= 0;j--) { + cout << state[i][j] << " "; + } +# endif + + bnb_memory = 0; + for (j = 1;j < nb_row;j++) { + if (((child[i]) && (child[j]) && (order[j] == order[i] + 1)) || + ((!child[j]) && (order[j] <= order[i] + 1))) { + for (k = 0;k < order[j] - 1;k++) { + if (state[j][k + 1] != state[i][k]) { + break; + } + } + + if ((order[j] == 1) || (k == order[j] - 1)) { +// if ((memo_type[i] == NON_TERMINAL) || (transition[i][state[j][0]] > 0.)) { + next[i][state[j][0]] = j; +/* } + else { + next[i][state[j][0]] = I_DEFAULT; + } */ + +# ifdef DEBUG + cout << "| "; + for (k = order[j] - 1;k >= 0;k--) { + cout << state[j][k] << " "; + } +# endif + + bnb_memory++; + if (bnb_memory == nb_state) { + break; + } + } + } + } + +# ifdef DEBUG + cout << endl; +# endif + + } + + else { + next[i] = NULL; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the previous memories. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::build_previous_memory() + +{ + if ((next) && (!nb_memory) && (!previous)) { + int i , j; + int *buffer; + + + nb_memory = new int[nb_row]; + previous = new int*[nb_row]; + nb_memory[0] = 0; + previous[0] = NULL; + + buffer = new int[nb_row - 1]; + for (i = 1;i < nb_row;i++) { + nb_memory[i] = 0; + +// if (next[i]) { + if ((type == ORDINARY) || (!child[i])) { + for (j = 1;j < nb_row;j++) { + if ((next[j]) && (next[j][state[i][0]] == i)) { + buffer[nb_memory[i]] = j; + nb_memory[i]++; + } + } + } + + if (nb_memory[i] > 0) { + previous[i] = new int[nb_memory[i]]; + for (j = 0;j < nb_memory[i];j++) { + previous[i][j] = buffer[j]; + } + } + else { + previous[i] = NULL; + } + } + + delete [] buffer; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Checking that the set of terminal memories is suffix-free. + * + * \return suffix-free or not. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkovChain::check_free_suffix() const + +{ + bool free_suffix = true; + int i , j , k; + + + for (i = 1;i < nb_row;i++) { + if ((!child[i]) && (order[i] >= 2)) { + for (j = 1;j < nb_row;j++) { + if ((!child[j]) && (order[j] < order[i])) { + for (k = 0;k < order[j];k++) { + if (state[i][k] != state[j][k]) { + break; + } + } + + if (k == order[j]) { + free_suffix = false; + break; + } + } + } + + if (j < nb_row) { + break; + } + } + } + + return free_suffix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the matrix of possible transitions between states + * (adjacency matrix of the graph of possible transitions). + * + * \return matrix of possible transitions. + */ +/*--------------------------------------------------------------*/ + +bool** VariableOrderMarkovChain::logic_transition_computation() const + +{ + bool **logic_transition; + int i , j; + double **order1_transition; + + + logic_transition = new bool*[nb_state]; + for (i = 0;i < nb_state;i++) { + logic_transition[i] = new bool[nb_state]; + logic_transition[i][i] = false; + } + + order1_transition = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + order1_transition[i] = new double[nb_state]; + for (j = 0;j < nb_state;j++) { + order1_transition[i][j] = 0.; + } + } + + for (i = 1;i < nb_row;i++) { + if (memo_type[i] == TERMINAL) { + for (j = 0;j < nb_state;j++) { + order1_transition[state[i][0]][j] += transition[i][j]; + } + } + } + + for (i = 0;i < nb_state;i++) { + for (j = 0;j < nb_state;j++) { + if (j != i) { + logic_transition[i][j] = (order1_transition[i][j] == 0. ? false : true); + } + } + } + + for (i = 0;i < nb_state;i++) { + delete [] order1_transition[i]; + } + delete [] order1_transition; + + return logic_transition; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the variable-order Markov chain classes + * (transient/recurrent/absorbing) from the state accessibility. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::component_computation() + +{ + bool **logic_transition; + int i; + + + logic_transition = logic_transition_computation(); + + Chain::component_computation(logic_transition); + + for (i = 0;i < nb_state;i++) { + delete [] logic_transition[i]; + } + delete [] logic_transition; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the non-terminal memories. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::build_non_terminal() + +{ + int i , j , k , m; + int nb_non_terminal , nb_terminal , bnb_memory; + + + nb_non_terminal = (nb_row - 1) / nb_state; + nb_terminal = (nb_row - 1) * (nb_state - 1) / nb_state + 1; +// nb_terminal = nb_non_terminal * (nb_state - 1) + 1; + + parent[0] = -1; + + i = 0; + for (j = nb_non_terminal;j < nb_non_terminal + nb_terminal;j++) { + for (k = order[j] - 1;k >= 0;k--) { + if (state[j][k] != 0) { + break; + } + } + bnb_memory = order[j] - 1 - k; + + // insertion of the non-terminal memories + + for (k = order[j] - bnb_memory;k < order[j];k++) { + memo_type[i] = NON_TERMINAL; + + for (m = 0;m < nb_state;m++) { + transition[i][m] = 0.; + } + + order[i] = k; + for (m = 0;m < order[i];m++) { + state[i][m] = state[j][m]; + } + + child[i] = new int[nb_state]; + i++; + } + + // copy of the terminal memory + + memo_type[i] = TERMINAL; + if (i < j) { + for (k = 0;k < nb_state;k++) { + transition[i][k] = transition[j][k]; + } + order[i] = order[j]; + for (k = 0;k < order[i];k++) { + state[i][k] = state[j][k]; + } + } + + // parent-children relationships + + for (k = 0;k < bnb_memory;k++) { + parent[i - k] = i - k - 1; + child[i - k - 1][state[i - k][order[i - k] - 1]] = i - k; + } + + find_parent_memory(i - bnb_memory); + i++; + } + +# ifdef DEBUG + { + cout << "\n"; + cout << "Suffix free? " << (check_free_suffix() ? "True" : "False") << endl; + + for (i = 0;i < nb_row;i++) { + for (j = max_order - 1;j >= order[i];j--) { + cout << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + cout << state[i][j] << " "; + } + + switch (memo_type[i]) { + case NON_TERMINAL : + cout << " " << SEQ_label[SEQL_NON_TERMINAL]; + break; + case TERMINAL : + cout << " " << SEQ_label[SEQL_TERMINAL]; + break; + } + + cout << " | " << parent[i]; + + if (child[i]) { + cout << " |"; + for (j = 0;j < nb_state;j++) { + cout << " " << child[i][j]; + } + } + + else { + cout << " "; + for (j = 0;j < nb_state;j++) { + cout << " "; + } + } + + cout << endl; + } + } +# endif + +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Application of a threshold on the variable-order Markov chain parameters. + * + * \param[in] min_probability minimum probability. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::thresholding(double min_probability) + +{ + bool stop; + int i , j; + int nb_correction; + double norm; + + + if (min_probability > THRESHOLDING_FACTOR / (double)nb_state) { + min_probability = THRESHOLDING_FACTOR / (double)nb_state; + } + + if (type == ORDINARY) { + do { + stop = true; + nb_correction = 0; + norm = 0.; + + for (i = 0;i < nb_state;i++) { + if (initial[i] <= min_probability) { + nb_correction++; + initial[i] = min_probability; + } + else { + norm += initial[i]; + } + } + + if (nb_correction > 0) { + for (i = 0;i < nb_state;i++) { + if (initial[i] > min_probability) { + initial[i] *= (1. - nb_correction * min_probability) / norm; + if (initial[i] < min_probability) { + stop = false; + } + } + } + } + } + while (!stop); + } + + for (i = 1;i < nb_row;i++) { + if ((memo_type[i] == TERMINAL) || ((type == ORDINARY) && + (memo_type[i] == NON_TERMINAL))) { + do { + stop = true; + nb_correction = 0; + norm = 0.; + + for (j = 0;j < nb_state;j++) { + if ((accessibility[state[i][0]][j]) && (transition[i][j] <= min_probability)) { + nb_correction++; + transition[i][j] = min_probability; + } + else { + norm += transition[i][j]; + } + } + + if (nb_correction > 0) { + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > min_probability) { + transition[i][j] *= (1. - nb_correction * min_probability) / norm; + if (transition[i][j] < min_probability) { + stop = false; + } + } + } + } + } + while (!stop); + } + + else if (memo_type[i] == COMPLETION) { + for (j = 0;j < nb_state;j++) { + transition[i][j] = transition[parent[i]][j]; + } + } + } + +/* if (accessibility) { + for (i = 0;i < nb_state;i++) { + delete [] accessibility[i]; + } + delete [] accessibility; + } + accessibility = NULL; + + delete [] component_nb_state; + + if (component) { + for (i = 0;i < nb_component;i++) { + delete [] component[i]; + } + delete [] component; + } + nb_component = 0; + + component_computation(); */ + + if (next) { + for (i = 1;i < nb_row;i++) { + delete [] next[i]; + } + delete [] next; + } + next = NULL; + + delete [] nb_memory; + nb_memory = NULL; + + if (previous) { + for (i = 1;i < nb_row;i++) { + delete [] previous[i]; + } + delete [] previous; + } + previous = NULL; + + build_memory_transition(); + build_previous_memory(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Determination of the maximum memory order. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::max_order_computation() + +{ + int i; + + + max_order = 0; + for (i = 0;i < nb_row;i++) { + if ((memo_type[i] == TERMINAL) && (order[i] > max_order)) { + max_order = order[i]; + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of parameters of a VariableOrderMarkovChain object. + * + * \param[in] min_probability minimum probability. + * + * \return number of parameters. + */ +/*--------------------------------------------------------------*/ + +int VariableOrderMarkovChain::nb_parameter_computation(double min_probability) const + +{ + int i , j; + int nb_parameter = 0; + + + // particular case order 0 + + if (max_order == 1) { + for (i = 0;i < nb_state - 1;i++) { + for (j = 2;j <= nb_state;j++) { + if (transition[j][i] != transition[1][i]) { + break; + } + } + + if (j <= nb_state) { + break; + } + } + + if (i == nb_state - 1) { + for (i = 0;i < nb_state;i++) { + if (transition[0][i] > min_probability) { + nb_parameter++; + } + } + + nb_parameter--; + } + } + + if (nb_parameter == 0) { + for (i = 1;i < nb_row;i++) { + if (memo_type[i] == TERMINAL) { +// if ((memo_type[i] == TERMINAL) || ((type == ORDINARY) && +// (memo_type[i] == NON_TERMINAL))) { + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > min_probability) { + nb_parameter++; + } + } + + nb_parameter--; + } + } + } + + return nb_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of transient parameters corresponding to + * the transition distributions attached to the non-terminal memories of + * an ordinary variable-order Markov chain. + * + * \param[in] min_probability minimum probability. + * + * \return number of transient parameters. + */ +/*--------------------------------------------------------------*/ + +int VariableOrderMarkovChain::nb_transient_parameter_computation(double min_probability) const + +{ + int i , j; + int nb_parameter = 0; + + + if (type == ORDINARY) { + for (i = 1;i < nb_row;i++) { + if (memo_type[i] == NON_TERMINAL) { + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > min_probability) { + nb_parameter++; + } + } + + nb_parameter--; + } + } + } + + return nb_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Analysis of the format of a VariableOrderMarkovChain object. + * + * \param[in] error reference on a StatError object, + * \param[in] in_file stream, + * \param[in] line reference on the file line index, + * \param[in] type process type (ORDINARY/EQUILIBRIUM). + * + * \return VariableOrderMarkovChain object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovChain* VariableOrderMarkovChain::parsing(StatError &error , ifstream &in_file , + int &line , process_type type) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + streampos transition_line; + bool status = true , lstatus , increase , **logic_transition; + int i , j; + int read_line , tline , value , nb_state = 0 , order , previous_order , max_order = 0 , buff , + nb_terminal , nb_non_terminal , memory , state[ORDER] , previous_state[ORDER]; + double proba , cumul , *initial; + VariableOrderMarkovChain *markov; + + + markov = NULL; + + // analysis of the line defining the number of states + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + switch (i) { + + // test number of states + + case 0 : { + lstatus = true; + +/* try { + value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + value = atoi(token->c_str()); + + if (lstatus) { + if ((value < 2) || (value > NB_STATE)) { + lstatus = false; + } + else { + nb_state = value; + } + } + + if (!lstatus) { + status = false; + error.update(STAT_parsing[STATP_NB_STATE] , line , i + 1); + } + break; + } + + // test STATES keyword + + case 1 : { + if (*token != STAT_word[STATW_STATES]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_STATES] , line , i + 1); + } + break; + } + } + + i++; + } + + if (i > 0) { + if (i != 2) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + break; + } + } + + if (nb_state == 0) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + if (status) { + initial = new double[nb_state]; + + // 1st pass: search for the number of terminal memories and the maximum order, + // analysis of the initial probabilities, transition probabilities and memories + + read_line = 0; + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + if ((read_line == 0) || ((type == ORDINARY) && (read_line == 2))) { + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + + // test INITIAL_PROBABILITIES/TRANSITION_PROBABILITIES keyword + + if (i == 0) { + if ((type == ORDINARY) && (read_line == 0)) { + if (*token != STAT_word[STATW_INITIAL_PROBABILITIES]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_INITIAL_PROBABILITIES] , line); + } + } + + else { + if (*token != STAT_word[STATW_TRANSITION_PROBABILITIES]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_TRANSITION_PROBABILITIES] , line); + } + } + } + + i++; + } + + if (i > 0) { + if (((type == ORDINARY) && (read_line == 2)) || ((type == EQUILIBRIUM) && (read_line == 0))) { + transition_line = in_file.tellg(); + tline = line; + } + + if (i != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + } + + else { + cumul = 0.; + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + if (i < nb_state) { + lstatus = true; + +/* try { + proba = stod(*token); in C++ 11 + } + catch (invalid_argument &arg) { + lstatus = false; + } */ + proba = atof(token->c_str()); + + if (lstatus) { + if ((proba < 0.) || (proba > 1. - cumul + DOUBLE_ERROR)) { + lstatus = false; + } + + else { + cumul += proba; + if ((type == ORDINARY) && (read_line == 1)) { + initial[i] = proba; + } + } + } + + if (!lstatus) { + status = false; + if ((type == ORDINARY) && (read_line == 1)) { + error.update(STAT_parsing[STATP_INITIAL_PROBABILITY] , line , i + 1); + } + else { + error.update(STAT_parsing[STATP_TRANSITION_PROBABILITY] , line , i + 1); + } + } + } + + else if ((type == EQUILIBRIUM) || (read_line >= 3)) { + lstatus = true; + +/* try { + value = stoi(*token); in C++ 11 + } + catch(invalid_argument &arg) { + lstatus = false; + } */ + value = atoi(token->c_str()); + + if (lstatus) { + if ((value < 0) || (value >= nb_state)) { + lstatus = false; + } + else if (i - nb_state < ORDER) { + state[i - nb_state] = value; + } + } + + if (!lstatus) { + status = false; + error.update(SEQ_parsing[SEQP_STATE] , line , i + 1); + } + } + + i++; + } + + if (i > 0) { + if ((type == ORDINARY) && (read_line == 1) && (i != nb_state)) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + if (cumul < 1. - DOUBLE_ERROR) { + status = false; + error.update(STAT_parsing[STATP_PROBABILITY_SUM] , line); + } + + if (((type == ORDINARY) && (read_line >= 3)) || ((type == EQUILIBRIUM) && (read_line >= 1))) { + if (i <= nb_state) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + else { + order = i - nb_state; + + if (order > ORDER) { + status = false; + error.update(STAT_parsing[STATP_ORDER] , line); + } + + else if (order > max_order) { + max_order = order; + } + + if (status) { + for (j = 0;j < order / 2;j++) { + buff = state[j]; + state[j] = state[order - j - 1]; + state[order - j - 1] = buff; + } + + if (read_line - (type == ORDINARY ? 3 : 1) == 0) { + for (j = 0;j < order;j++) { + if (state[j] != 0) { + status = false; + error.update(SEQ_parsing[SEQP_STATE] , line , nb_state + order - j); + } + } + } + + else { + + // checking of the memory succession + + increase = true; + for (j = MIN(previous_order , order) - 1;j >= 0;j--) { + if (increase) { + if (previous_state[j] < nb_state - 1) { + if (state[j] != previous_state[j] + 1) { + status = false; + error.update(SEQ_parsing[SEQP_STATE] , line , nb_state + order - j); + } + increase = false; + } + + else { + if (state[j] != 0) { + status = false; + error.update(SEQ_parsing[SEQP_STATE] , line , nb_state + order - j); + } + } + } + + else { + if (state[j] != previous_state[j]) { + status = false; + error.update(SEQ_parsing[SEQP_STATE] , line , nb_state + order - j); + } + } + } + + // case increase of the memory length or stable memory length + + if (order >= previous_order) { + for (j = order - 1;j >= previous_order;j--) { + if (state[j] != 0) { + status = false; + error.update(SEQ_parsing[SEQP_STATE] , line , nb_state + order - j); + } + } + } + + // case decrease of the memory length + + else { + for (j = order;j < previous_order;j++) { + if (previous_state[j] != nb_state - 1) { + status = false; + error.update(SEQ_parsing[SEQP_STATE] , line); + } + } + } + } + + // search for the last memory + + for (j = 0;j < order;j++) { + if (state[j] != nb_state - 1) { + break; + } + } + + if (j == order) { + read_line++; + break; + } + + else { + for (j = 0;j < order;j++) { + previous_state[j] = state[j]; + } + previous_order = order; + } + } + } + } + } + } + + if (i > 0) { + read_line++; + } + } + + // checking of the number of memories + + nb_terminal = read_line - (type == ORDINARY ? 3 : 1); + if ((nb_state > 2) && (nb_terminal % (nb_state - 1) != 1)) { + status = false; + error.update(SEQ_parsing[SEQP_NB_MEMORY] , line); + } + + if (status) { + in_file.clear(); + in_file.seekg(transition_line); + + nb_non_terminal = (nb_terminal - 1) / (nb_state - 1); + markov = new VariableOrderMarkovChain(type , nb_state , nb_non_terminal + nb_terminal , max_order); + + if (type == ORDINARY) { + for (i = 0;i < nb_state;i++) { + markov->initial[i] = initial[i]; + } + } + + // 2nd pass: reading of the transition probabilities and the memories + + memory = nb_non_terminal; + line = tline; + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + if (i < nb_state) { +// markov->transition[memory][i] = stod(*token); in C++ 11 + markov->transition[memory][i] = atof(token->c_str()); + } + + else { +// state[i - nb_state] = stoi(*token); in C++ 11 + state[i - nb_state] = atoi(token->c_str()); + } + + i++; + } + + if (i > 0) { + markov->order[memory] = i - nb_state; + for (j = 0;j < markov->order[memory];j++) { + markov->state[memory][j] = state[markov->order[memory] - j - 1]; + } + + // search for the last memory + + for (j = 0;j < markov->order[memory];j++) { + if (markov->state[memory][j] != nb_state - 1) { + break; + } + } + + if (j == order) { + break; + } + else { + memory++; + } + } + } + + markov->build_non_terminal(); + + // test accessible states + + logic_transition = markov->logic_transition_computation(); + status = markov->strongly_connected_component_research(error , logic_transition); + + if (status) { + + // test irreducibility in the equilibrium process case + + markov->Chain::component_computation(logic_transition); + if ((type == EQUILIBRIUM) && (markov->nb_component > 1)) { + status = false; + error.correction_update(STAT_parsing[STATP_CHAIN_STRUCTURE] , STAT_parsing[STATP_IRREDUCIBLE]); + } + } + + for (i = 0;i < nb_state;i++) { + delete [] logic_transition[i]; + } + delete [] logic_transition; + + if (!status) { + delete markov; + markov = NULL; + } + else { + markov->max_order_computation(); + } + } + + delete [] initial; + } + + return markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the memory out-tree. + * + * \param[in,out] os stream, + * \param[in] file_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkovChain::ascii_memory_tree_print(ostream &os , bool file_flag) const + +{ + int i , j , k; + int bnb_memory , width = column_width(nb_state); + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::left , ios::adjustfield); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_MEMORY_TREE] << endl; + + os << "\n"; + for (i = 0;i < nb_row;i++) { + if (!child[i]) { + if (file_flag) { + os << "# "; + } + + for (j = order[i] - 1;j >= 1;j--) { + if (state[i][j] != 0) { + break; + } + } + bnb_memory = order[i] - j; + + for (j = 0;j < order[i] - bnb_memory;j++) { + if (state[i][j] < nb_state - 1) { + os << "|"; + } + else { + os << " "; + } + + for (k = 0;k < (j + 1) * (width + 1) + 1;k++) { + os << " "; + } + } + + os << "|"; + for (j = order[i] - bnb_memory;j < order[i];j++) { + for (k = 0;k < 3;k++) { + os << "_"; + } + for (k = j;k >= 0;k--) { + os << setw(width) << state[i][k]; + if (k > 0) { + os << " "; + } + } + } + + os << endl; + } + } + + os.setf(format_flags , ios::adjustfield); + +# ifdef MESSAGE + os << "\n"; + for (i = 0;i < nb_row;i++) { + if (child[i]) { + if (file_flag) { + os << "# "; + } + + for (j = max_order - 1;j >= order[i];j--) { + os << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + + for (j = 0;j < nb_state;j++) { + os << " "; + for (k = max_order - 1;k >= order[child[i][j]];k--) { + os << " "; + } + for (k = order[child[i][j]] - 1;k >= 0;k--) { + os << state[child[i][j]][k] << " "; + } + } + + if (memo_type[i] == COMPLETION) { + os << " " << SEQ_label[SEQL_COMPLETION]; + } + + os << endl; + } + } +# endif + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the memory prefix in-tree. + * + * \param[in,out] os stream, + * \param[in] file_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkovChain::ascii_transition_tree_print(ostream &os , bool file_flag) const + +{ + int i , j , k; + int min_order , nb_root , memory , width = column_width(nb_state) , *nb_next_memory , + *root , *nb_leaf_memory , *nb_drawn_next_memory , *nb_drawn_leaf_memory; + ios_base::fmtflags format_flags; + + + // computation of the number of following memories of higher length + + nb_next_memory = new int[nb_row]; + root = new int[nb_row]; + nb_leaf_memory = new int[nb_row]; + nb_drawn_next_memory = new int[nb_row]; + nb_drawn_leaf_memory = new int[nb_row]; + + min_order = max_order; + + for (i = 1;i < nb_row;i++) { + nb_next_memory[i] = 0; + + if ((type == ORDINARY) || (!child[i])) { + for (j = 0;j < nb_state;j++) { + if (order[next[i][j]] == order[i] + 1) { + nb_next_memory[i]++; + } + } + + if (nb_next_memory[i] == 0) { + nb_leaf_memory[i] = 1; + } + else { + nb_leaf_memory[i] = 0; + } + + if (order[i] < min_order) { + min_order = order[i]; + } + } + } + + // computation of the roots + + nb_root = 0; + for (i = 1;i < nb_row;i++) { + if ((type == ORDINARY) || (!child[i])) { + for (j = 0;j < nb_memory[i];j++) { + if (order[previous[i][j]] == order[i] - 1) { + break; + } + } + + if (j == nb_memory[i]) { + root[nb_root++] = i; + } + } + } + + // computation of the number of leaf memories + + for (i = max_order;i >= min_order + 1;i--) { + for (j = 1;j < nb_row;j++) { + if ((order[j] == i) && (nb_leaf_memory[j] > 0) && + ((type == ORDINARY) || (!child[j]))) { + for (k = 0;k < nb_memory[j];k++) { + if ((order[previous[j][k]] == order[j] - 1) && + ((type == ORDINARY) || (!child[previous[j][k]]))) { + nb_leaf_memory[previous[j][k]] += nb_leaf_memory[j]; + break; + } + } + } + } + } + +# ifdef DEBUG + cout << "\n"; + for (i = 1;i < nb_row;i++) { + if ((type == ORDINARY) || (!child[i])) { + for (j = max_order - 1;j >= order[i];j--) { + cout << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + cout << state[i][j] << " "; + } + + cout << " " << nb_next_memory[i] << " | " << nb_leaf_memory[i]; + for (j = 0;j < nb_root;j++) { + if (root[j] == i) { + cout << " root"; + break; + } + } + cout << endl; + } + } +# endif + + for (i = 1;i < nb_row;i++) { + if ((type == ORDINARY) || (!child[i])) { + nb_drawn_next_memory[i] = nb_next_memory[i]; + nb_drawn_leaf_memory[i] = nb_leaf_memory[i]; + } + } + + format_flags = os.setf(ios::left , ios::adjustfield); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_TRANSITION_TREE] << endl; + + for (i = 0;i < nb_root;i++) { + os << "\n"; + for (j = 0;j < nb_leaf_memory[root[i]];j++) { + if (file_flag) { + os << "# "; + } + + memory = root[i]; + for (;;) { + if (!child[memory]) { + if (nb_drawn_leaf_memory[memory] == nb_leaf_memory[memory]) { + if (memory != root[i]) { + for (k = 0;k < 3;k++) { + os << "_"; + } + } + + for (k = order[memory] - 1;k >= 0;k--) { + os << setw(width) << state[memory][k]; + if (k > 0) { + os << " "; + } + } + } + + else { + if (memory != root[i]) { + for (k = 0;k < 3;k++) { + os << " "; + } + } + + for (k = 0;k < order[memory] * (width + 1) - 2;k++) { + os << " "; + } + + if (nb_drawn_next_memory[memory] > 0) { + os << "|"; + } + else { + os << " "; + } + } + } + + else { + if (memory != root[i]) { + for (k = 0;k < 3;k++) { + os << " "; + } + } + + for (k = 0;k < order[memory] * (width + 1) - 1;k++) { + os << " "; + } + } + + nb_drawn_leaf_memory[memory]--; + + if (nb_next_memory[memory] == 0) { + os << endl; + break; + } + + for (k = 0;k < nb_state;k++) { + if ((order[next[memory][k]] == order[memory] + 1) && + (nb_drawn_leaf_memory[next[memory][k]] > 0)) { + if (nb_drawn_leaf_memory[next[memory][k]] == nb_leaf_memory[next[memory][k]]) { + nb_drawn_next_memory[memory]--; + } + memory = next[memory][k]; + break; + } + } + } + } + } + + os.setf(format_flags , ios::adjustfield); + + delete [] nb_next_memory; + delete [] root; + delete [] nb_leaf_memory; + delete [] nb_drawn_next_memory; + delete [] nb_drawn_leaf_memory; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the variable-order Markov chain parameters. + * + * \param[in,out] os stream, + * \param[in] file_flag flag file. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkovChain::ascii_print(ostream &os , bool file_flag) const + +{ + int i , j , k; + int buff , width; + double *stationary_probability; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::left , ios::adjustfield); + + os << "\n" << nb_state << " " << STAT_word[STATW_STATES] << endl; + + // computation of the column width + + width = column_width((type == ORDINARY ? nb_state : nb_row) , initial); + + for (i = 1;i < nb_row;i++) { + buff = column_width(nb_state , transition[i]); + if (buff > width) { + width = buff; + } + } + width += ASCII_SPACE; + + os << "\n"; + + switch (type) { + + case ORDINARY : { + os << STAT_word[STATW_INITIAL_PROBABILITIES] << endl; + for (i = 0;i < nb_state;i++) { + os << setw(width) << initial[i]; + } + os << endl; + break; + } + + case EQUILIBRIUM : { + + // computation of the stationary probabilities corresponding to the memories added by completion + + stationary_probability = new double[nb_row]; + + for (i = 1;i < nb_row;i++) { + stationary_probability[i] = initial[i]; + } + for (i = nb_row - 1;i >= 1;i--) { + if (memo_type[i] == COMPLETION) { + stationary_probability[parent[i]] += stationary_probability[i]; + } + } + + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_STATIONARY_PROBABILITIES] << endl; + + for (i = 1;i < nb_row;i++) { + if (memo_type[i] == TERMINAL) { + if (file_flag) { + os << "# "; + } + os << setw(width) << stationary_probability[i]; + + os << " "; + for (j = max_order - 1;j >= order[i];j--) { + os << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + os << endl; + } + } + + delete [] stationary_probability; + break; + } + } + + os << "\n" << STAT_word[STATW_TRANSITION_PROBABILITIES] << " "; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_MEMORY] << endl; + + for (i = 1;i < nb_row;i++) { + if ((memo_type[i] != TERMINAL) && (file_flag)) { + os << "# "; + } + + for (j = 0;j < nb_state;j++) { + os << setw(width) << transition[i][j]; + } + + os << " "; + for (j = max_order - 1;j >= order[i];j--) { + os << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + + if ((memo_type[i] == TERMINAL) && (file_flag)) { + os << "# "; + } + + switch (memo_type[i]) { + + case NON_TERMINAL : { + os << " " << SEQ_label[SEQL_NON_TERMINAL]; + break; + } + + case TERMINAL : { + os << " " << SEQ_label[SEQL_TERMINAL]; + if (child[i]) { + os << " (" << SEQ_label[SEQL_COMPLETED] << ")"; + } + break; + } + + case COMPLETION : { + os << " " << SEQ_label[SEQL_COMPLETION] << " (" + << (child[i] ? SEQ_label[SEQL_NON_TERMINAL] : SEQ_label[SEQL_TERMINAL]) << ")"; + break; + } + } + + os << endl; + } + + if (nb_component > 0) { + for (i = 0;i < nb_component;i++) { + os << "\n"; + if (file_flag) { + os << "# "; + } + + switch (stype[component[i][0]]) { + case TRANSIENT : + os << STAT_label[STATL_TRANSIENT] << " "; + break; + default : + os << STAT_label[STATL_RECURRENT] << " "; + break; + } + os << STAT_label[STATL_CLASS] << ": " << STAT_label[component_nb_state[i] == 1 ? STATL_STATE : STATL_STATES]; + + for (j = 0;j < component_nb_state[i];j++) { + os << " " << component[i][j]; + } + + if (stype[component[i][0]] == ABSORBING) { + os << " (" << STAT_label[STATL_ABSORBING] << " " << STAT_label[STATL_STATE] << ")"; + } + } + os << endl; + } + + ascii_memory_tree_print(os , file_flag); + ascii_transition_tree_print(os , file_flag); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_MEMORY_TRANSITION_MATRIX] << endl; + + os << "\n"; + for (i = 1;i < nb_row;i++) { + if ((type == ORDINARY) || (!child[i])) { + if (file_flag) { + os << "# "; + } + + for (j = max_order - 1;j >= order[i];j--) { + os << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + + for (j = 0;j < nb_state;j++) { + os << " "; + for (k = max_order - 1;k >= order[next[i][j]];k--) { + os << " "; + } + for (k = order[next[i][j]] - 1;k >= 0;k--) { + os << state[next[i][j]][k] << " "; + } + } + + switch (memo_type[i]) { + + case NON_TERMINAL : { + os << " " << SEQ_label[SEQL_NON_TERMINAL]; + break; + } + + case TERMINAL : { + os << " " << SEQ_label[SEQL_TERMINAL]; + if (child[i]) { + os << " (" << SEQ_label[SEQL_COMPLETED] << ")"; + } + break; + } + + case COMPLETION : { + os << " " << SEQ_label[SEQL_COMPLETION] << " (" + << (child[i] ? SEQ_label[SEQL_NON_TERMINAL] : SEQ_label[SEQL_TERMINAL]) << ")"; + break; + } + } + + os << endl; + } + } + +# ifdef DEBUG + if (previous) { + os << "\n"; + for (i = 1;i < nb_row;i++) { + if ((type == ORDINARY) || (!child[i])) { + for (j = max_order - 1;j >= order[i];j--) { + os << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + + for (j = 0;j < nb_memory[i];j++) { + os << " "; + for (k = max_order - 1;k >= order[previous[i][j]];k--) { + os << " "; + } + for (k = order[previous[i][j]] - 1;k >= 0;k--) { + os << state[previous[i][j]][k] << " "; + } + } + os << endl; + } + } + } +# endif + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the variable-order Markov chain parameters at the spreadsheet format. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkovChain::spreadsheet_print(ostream &os) const + +{ + int i , j , k; + double *stationary_probability; + + + os << "\n" << nb_state << "\t" << STAT_word[STATW_STATES] << endl; + + switch (type) { + + case ORDINARY : { + os << "\n" << STAT_word[STATW_INITIAL_PROBABILITIES] << endl; + for (i = 0;i < nb_state;i++) { + os << initial[i] << "\t"; + } + os << endl; + break; + } + + case EQUILIBRIUM : { + + // computation of the stationary probabilities corresponding to the memories added by completion + + stationary_probability = new double[nb_row]; + + for (i = 1;i < nb_row;i++) { + stationary_probability[i] = initial[i]; + } + for (i = nb_row - 1;i >= 1;i--) { + if (memo_type[i] == COMPLETION) { + stationary_probability[parent[i]] += stationary_probability[i]; + } + } + + os << "\n" << STAT_label[STATL_STATIONARY_PROBABILITIES] << endl; + for (i = 1;i < nb_row;i++) { + if (memo_type[i] == TERMINAL) { + os << stationary_probability[i] << "\t\t"; + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + os << endl; + } + } + + delete [] stationary_probability; + break; + } + } + + os << "\n" << STAT_word[STATW_TRANSITION_PROBABILITIES] + << "\t\t" << STAT_label[STATL_MEMORY] << endl; + + for (i = 1;i < nb_row;i++) { + for (j = 0;j < nb_state;j++) { + os << transition[i][j] << "\t"; + } + + os << "\t"; + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + + os << "\t"; + switch (memo_type[i]) { + + case NON_TERMINAL : { + os << SEQ_label[SEQL_NON_TERMINAL]; + break; + } + + case TERMINAL : { + os << SEQ_label[SEQL_TERMINAL]; + if (child[i]) { + os << "\t" << SEQ_label[SEQL_COMPLETED]; + } + break; + } + + case COMPLETION : { + os << SEQ_label[SEQL_COMPLETION] << "\t" + << (child[i] ? SEQ_label[SEQL_NON_TERMINAL] : SEQ_label[SEQL_TERMINAL]); + break; + } + } + + os << endl; + } + + if (nb_component > 0) { + for (i = 0;i < nb_component;i++) { + switch (stype[component[i][0]]) { + case TRANSIENT : + os << "\n"<< STAT_label[STATL_TRANSIENT] << " "; + break; + default : + os << "\n" << STAT_label[STATL_RECURRENT] << " "; + break; + } + os << STAT_label[STATL_CLASS] << "\t" << STAT_label[component_nb_state[i] == 1 ? STATL_STATE : STATL_STATES]; + + for (j = 0;j < component_nb_state[i];j++) { + os << "\t" << component[i][j]; + } + + if (stype[component[i][0]] == ABSORBING) { + os << " (" << STAT_label[STATL_ABSORBING] << " " << STAT_label[STATL_STATE] << ")"; + } + } + os << endl; + } + + os << SEQ_label[SEQL_MEMORY_TRANSITION_MATRIX] << endl; + + os << "\n"; + for (i = 1;i < nb_row;i++) { + if ((type == ORDINARY) || (!child[i])) { + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + + os << "\t"; + for (j = 0;j < nb_state;j++) { + os << "\t"; + for (k = order[next[i][j]] - 1;k >= 0;k--) { + os << state[next[i][j]][k] << " "; + } + } + + os << "\t"; + switch (memo_type[i]) { + + case NON_TERMINAL : { + os << SEQ_label[SEQL_NON_TERMINAL]; + break; + } + + case TERMINAL : { + os << SEQ_label[SEQL_TERMINAL]; + if (child[i]) { + os << "\t" << SEQ_label[SEQL_COMPLETED]; + } + break; + } + + case COMPLETION : { + os << SEQ_label[SEQL_COMPLETION] << "\t" + << (child[i] ? SEQ_label[SEQL_NON_TERMINAL] : SEQ_label[SEQL_TERMINAL]); + break; + } + } + + os << endl; + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the VariableOrderMarkov class. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov::VariableOrderMarkov() + +{ + nb_iterator = 0; + markov_data = NULL; + + nb_output_process = 0; + categorical_process = NULL; + discrete_parametric_process = NULL; + continuous_parametric_process = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkov class. + * + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] inb_state number of states, + * \param[in] inb_row number of memories. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov::VariableOrderMarkov(process_type itype , int inb_state , int inb_row) +:VariableOrderMarkovChain(itype , inb_state , inb_row) + +{ + nb_iterator = 0; + markov_data = NULL; + + nb_output_process = 0; + categorical_process = NULL; + discrete_parametric_process = NULL; + continuous_parametric_process = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkov class. + * + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] inb_state number of states, + * \param[in] inb_row number of memories, + * \param[in] imax_order maximum order. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov::VariableOrderMarkov(process_type itype , int inb_state , + int inb_row , int imax_order) +:VariableOrderMarkovChain(itype , inb_state , inb_row , imax_order) + +{ + nb_iterator = 0; + markov_data = NULL; + + nb_output_process = 0; + categorical_process = NULL; + discrete_parametric_process = NULL; + continuous_parametric_process = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a VariableOrderMarkov object of fixed order. + * + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] inb_state number of states, + * \param[in] iorder order, + * \param[in] init_flag flag initialization, + * \param[in inb_output_process number of observation processes, + * \param[in nb_value number of observed values for the categorical observation process. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov::VariableOrderMarkov(process_type itype , int inb_state , + int iorder , bool init_flag , + int inb_output_process , int nb_value) +:VariableOrderMarkovChain(itype , inb_state , iorder , init_flag) + +{ + nb_iterator = 0; + markov_data = NULL; + + nb_output_process = inb_output_process; + + if (nb_output_process == 1) { + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + categorical_process[0] = new CategoricalSequenceProcess(nb_state , nb_value , true); + } + else { + categorical_process = NULL; + } + + discrete_parametric_process = NULL; + continuous_parametric_process = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkov class with completion of + * the memory tree. + * + * \param[in] markov reference on a VariableOrderMarkov object, + * \param[in] inb_output_process number of observation processes, + * \param[in] nb_value number of observed values for the categorical observation process. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov::VariableOrderMarkov(const VariableOrderMarkov &markov , + int inb_output_process , int nb_value) + +{ + memory_tree_completion(markov); + + nb_iterator = 0; + + if (markov.markov_data) { + markov_data = new VariableOrderMarkovData(*(markov.markov_data) , false); + } + else { + markov_data = NULL; + } + + nb_output_process = inb_output_process; + + state_process = new CategoricalSequenceProcess(nb_state , nb_state , false); + + if (nb_output_process == 1) { + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + categorical_process[1] = new CategoricalSequenceProcess(nb_state , nb_value , true); + } + else { + categorical_process = NULL; + } + + discrete_parametric_process = NULL; + continuous_parametric_process = NULL; +} + + +/*--------------------------------------------------------------*/ +/* + * \brief Constructor of the VariableOrderMarkov class with completion of + * the memory tree. + * + * \param[in] markov reference on a VariableOrderMarkov object, + * \param[in] inb_output_process number of observation processes, + * \param[in] nb_value number of observed values for each observation process. + */ +/*--------------------------------------------------------------*/ + +/* VariableOrderMarkov::VariableOrderMarkov(const VariableOrderMarkov &markov , + int inb_output_process , int *nb_value) + +{ + int i; + + + memory_tree_completion(markov); + + nb_iterator = 0; + + if (markov.markov_data) { + markov_data = new VariableOrderMarkovData(*(markov.markov_data) , false); + } + else { + markov_data = NULL; + } + + nb_output_process = inb_output_process; + + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + discrete_parametric_process = new DiscreteParametricProcess*[nb_output_process]; + continuous_parametric_process = new ContinuousParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (nb_value[i] == I_DEFAULT) { + categorical_process[i] = NULL; + discrete_parametric_process[i] = NULL; + continuous_parametric_process[i] = new ContinuousParametricProcess(nb_state); + } + + else if (nb_value[i] <= NB_OUTPUT) { + categorical_process[i] = new CategoricalSequenceProcess(nb_state , nb_value[i] , true); + discrete_parametric_process[i] = NULL; + continuous_parametric_process[i] = NULL; + } + + else { + categorical_process[i] = NULL; + discrete_parametric_process[i] = new DiscreteParametricProcess(nb_state , (int)(nb_value[i] * SAMPLE_NB_VALUE_COEFF)); + continuous_parametric_process[i] = NULL; + } + } +} */ + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkov class with completion of + * the memory tree. + * + * \param[in] pmarkov pointer on a VariableOrderMarkovChain object, + * \param[in] pobservation pointer on a CategoricalProcess object, + * \param[in] length sequence length. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov::VariableOrderMarkov(const VariableOrderMarkovChain *pmarkov , + const CategoricalProcess *pobservation , int length) + +{ + build(*pmarkov); + + nb_iterator = 0; + markov_data = NULL; + + nb_output_process = (pobservation ? 1 : 0); + + if (nb_output_process == 1) { + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + categorical_process[0] = new CategoricalSequenceProcess(*pobservation); + } + else { + categorical_process = NULL; + } + + discrete_parametric_process = NULL; + continuous_parametric_process = NULL; + + characteristic_computation(length , true); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a VariableOrderMarkov object. + * + * \param[in] markov reference on a VariableOrderMarkov object. + * \param[in] data_flag flag copy of the included VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::copy(const VariableOrderMarkov &markov , bool data_flag) + +{ + int i , j; + + + nb_iterator = 0; + + if ((data_flag) && (markov.markov_data)) { + markov_data = new VariableOrderMarkovData(*(markov.markov_data) , false); + } + else { + markov_data = NULL; + } + + nb_output_process = markov.nb_output_process; + + if (markov.categorical_process) { + categorical_process = new CategoricalSequenceProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (markov.categorical_process[i]) { + categorical_process[i] = new CategoricalSequenceProcess(*(markov.categorical_process[i])); + } + else { + categorical_process[i] = NULL; + } + } + } + + else { + categorical_process = NULL; + } + + if (markov.discrete_parametric_process) { + discrete_parametric_process = new DiscreteParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (markov.discrete_parametric_process[i]) { + discrete_parametric_process[i] = new DiscreteParametricProcess(*(markov.discrete_parametric_process[i])); + } + else { + discrete_parametric_process[i] = NULL; + } + } + } + + else { + discrete_parametric_process = NULL; + } + + if (markov.continuous_parametric_process) { + continuous_parametric_process = new ContinuousParametricProcess*[nb_output_process]; + + for (i = 0;i < nb_output_process;i++) { + if (markov.continuous_parametric_process[i]) { + continuous_parametric_process[i] = new ContinuousParametricProcess(*(markov.continuous_parametric_process[i])); + } + else { + continuous_parametric_process[i] = NULL; + } + } + } + + else { + continuous_parametric_process = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of the data members of a VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::remove() + +{ + int i; + + + delete markov_data; + + if (categorical_process) { + for (i = 0;i < nb_output_process;i++) { + delete categorical_process[i]; + } + delete [] categorical_process; + } + + if (discrete_parametric_process) { + for (i = 0;i < nb_output_process;i++) { + delete discrete_parametric_process[i]; + } + delete [] discrete_parametric_process; + } + + if (continuous_parametric_process) { + for (i = 0;i < nb_output_process;i++) { + delete continuous_parametric_process[i]; + } + delete [] continuous_parametric_process; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the VariableOrderMarkov class. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov::~VariableOrderMarkov() + +{ + remove(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destruction of a VariableOrderMarkov object taking account of + * the number of iterators pointing to it. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::conditional_delete() + +{ + if (nb_iterator == 0) { + delete this; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the VariableOrderMarkov class. + * + * \param[in] markov reference on a VariableOrderMarkov object. + * + * \return VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov& VariableOrderMarkov::operator=(const VariableOrderMarkov &markov) + +{ + if ((&markov != this) && (nb_iterator == 0)) { + remove(); + VariableOrderMarkovChain::remove(); + Chain::remove(); + + Chain::copy(markov); + VariableOrderMarkovChain::copy(markov); + copy(markov); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] dist_type distribution type, + * \param[in] variable variable index, + * \param[in] value state or observation. + * + * \return DiscreteParametricModel object. + */ +/*--------------------------------------------------------------*/ + +DiscreteParametricModel* VariableOrderMarkov::extract(StatError &error , process_distribution dist_type , + int variable , int value) const + +{ + bool status = true; + int hvariable; + Distribution *pdist; + DiscreteParametric *pparam; + DiscreteParametricModel *dist; + FrequencyDistribution *phisto; + CategoricalSequenceProcess *process; + + + dist = NULL; + error.init(); + + pdist = NULL; + pparam = NULL; + + if (dist_type == OBSERVATION) { + if ((variable < 1) || (variable > nb_output_process)) { + status = false; + error.update(STAT_error[STATR_OUTPUT_PROCESS_INDEX]); + } + + else { + if ((value < 0) || (value >= nb_state)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << value << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + if (categorical_process[variable - 1]) { + pdist = categorical_process[variable - 1]->observation[value]; + } + else if (discrete_parametric_process[variable - 1]) { + pparam = discrete_parametric_process[variable - 1]->observation[value]; + } + else { + status = false; + ostringstream correction_message; + correction_message << STAT_label[STATL_CATEGORICAL] << " or " + << STAT_label[STATL_DISCRETE_PARAMETRIC]; + error.correction_update(STAT_error[STATR_OUTPUT_PROCESS_TYPE] , (correction_message.str()).c_str()); + } + } + } + } + + else { + if ((variable < 0) || (variable > nb_output_process)) { + status = false; + error.update(STAT_error[STATR_OUTPUT_PROCESS_INDEX]); + } + + else { + if (variable == 0) { + process = state_process; + } + + else { + process = categorical_process[variable - 1]; + + if (!process) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable << ": " + << SEQ_error[SEQR_CHARACTERISTICS_NOT_COMPUTED]; + error.update((error_message.str()).c_str()); + } + } + + if ((process) && ((value < 0) || (value >= process->nb_value))) { + status = false; + ostringstream error_message; + error_message << STAT_label[variable == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << value << " " << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + if (status) { + switch (dist_type) { + case FIRST_OCCURRENCE : + pdist = process->first_occurrence[value]; + break; + case RECURRENCE_TIME : + pdist = process->recurrence_time[value]; + break; + case SOJOURN_TIME : + pparam = process->sojourn_time[value]; + break; + case NB_RUN : + pdist = process->nb_run[value]; + break; + case NB_OCCURRENCE : + pdist = process->nb_occurrence[value]; + break; + } + + if ((!pdist) && (!pparam)) { + status = false; + error.update(SEQ_error[SEQR_NON_EXISTING_CHARACTERISTIC_DISTRIBUTION]); + } + } + } + } + + if (status) { + phisto = NULL; + + if (markov_data) { + switch (markov_data->type[0]) { + case STATE : + hvariable = variable; + break; + case INT_VALUE : + hvariable = variable - 1; + break; + } + + if (hvariable >= 0) { + switch (dist_type) { + + case OBSERVATION : { + if ((markov_data->observation_distribution) && + (markov_data->observation_distribution[hvariable])) { + phisto = markov_data->observation_distribution[hvariable][value]; + } + break; + } + + case FIRST_OCCURRENCE : { + phisto = markov_data->characteristics[hvariable]->first_occurrence[value]; + break; + } + + case RECURRENCE_TIME : { + if (markov_data->characteristics[hvariable]->recurrence_time[value]->nb_element > 0) { + phisto = markov_data->characteristics[hvariable]->recurrence_time[value]; + } + break; + } + + case SOJOURN_TIME : { + if (markov_data->characteristics[hvariable]->sojourn_time[value]->nb_element > 0) { + phisto = markov_data->characteristics[hvariable]->sojourn_time[value]; + } + break; + } + + case NB_RUN : { + phisto = markov_data->characteristics[hvariable]->nb_run[value]; + break; + } + + case NB_OCCURRENCE : { + phisto = markov_data->characteristics[hvariable]->nb_occurrence[value]; + break; + } + } + } + } + + if (pdist) { + dist = new DiscreteParametricModel(*pdist , phisto); + } + else if (pparam) { + dist = new DiscreteParametricModel(*pparam , phisto); + } + } + + return dist; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of the VariableOrderMarkovData object included in + * a VariableOrderMarkov object. + * + * \param[in] error reference on a StatError object. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* VariableOrderMarkov::extract_data(StatError &error) const + +{ + bool status = true; + VariableOrderMarkovData *seq; + + + seq = NULL; + error.init(); + + if (!markov_data) { + status = false; + error.update(STAT_error[STATR_NO_DATA]); + } + else if (nb_output_process + 1 != markov_data->nb_variable) { + status = false; + error.update(SEQ_error[SEQR_STATE_SEQUENCES]); + } + + if (status) { + seq = new VariableOrderMarkovData(*markov_data); + seq->markov = new VariableOrderMarkov(*this , false); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Application of a threshold on the probability parameters of a variable-order Markov chain. + * + * \param[in] min_probability minimum probability. + * + * \return VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov* VariableOrderMarkov::thresholding(double min_probability) const + +{ + int i; + VariableOrderMarkov *markov; + + + markov = new VariableOrderMarkov(*this , false); + markov->VariableOrderMarkovChain::thresholding(min_probability); + + for (i = 0;i < markov->nb_output_process;i++) { + if (markov->categorical_process[i]) { + markov->categorical_process[i]->thresholding(min_probability); + } + } + + return markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a VariableOrderMarkov object from a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] length sequence length. + * + * \return VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov* VariableOrderMarkov::ascii_read(StatError &error , + const string path , int length) + +{ + string buffer; + size_t position; + typedef tokenizer> tokenizer; + char_separator separator(" \t"); + process_type type = DEFAULT_TYPE; + bool status; + int i; + int line; + const VariableOrderMarkovChain *imarkov; + const CategoricalProcess *observation; + VariableOrderMarkov *markov; + ifstream in_file(path.c_str()); + + + markov = NULL; + error.init(); + + if (!in_file) { + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + line = 0; + + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + + // test (EQUILIBRIUM_)MARKOV_CHAIN keyword + + if (i == 0) { + if (*token == SEQ_word[SEQW_MARKOV_CHAIN]) { + type = ORDINARY; + } + else if (*token == SEQ_word[SEQW_EQUILIBRIUM_MARKOV_CHAIN]) { + type = EQUILIBRIUM; + } + else { + status = false; + ostringstream correction_message; + correction_message << SEQ_word[SEQW_MARKOV_CHAIN] << " or " + << SEQ_word[SEQW_EQUILIBRIUM_MARKOV_CHAIN]; + error.correction_update(STAT_parsing[STATP_KEYWORD] , + (correction_message.str()).c_str() , line); + } + } + + i++; + } + + if (i > 0) { + if (i != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + break; + } + } + + if (type != DEFAULT_TYPE) { + + // analysis of the format and reading of the variable-order Markov chain + + imarkov = VariableOrderMarkovChain::parsing(error , in_file , line , type); + + if (imarkov) { + + // analysis of the format and reading of the categorical observation distributions + + observation = NULL; + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + i = 0; + + tokenizer tok_buffer(buffer , separator); + + for (tokenizer::iterator token = tok_buffer.begin();token != tok_buffer.end();token++) { + + // test OUTPUT_PROCESS keyword + + if (i == 0) { + if (*token != STAT_word[STATW_OUTPUT_PROCESS]) { + status = false; + error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_OUTPUT_PROCESS] , line); + } + } + + i++; + } + + if (i > 0) { + if (i != 1) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + + observation = CategoricalProcess::parsing(error , in_file , line , + ((Chain*)imarkov)->nb_state , + HIDDEN_MARKOV , false); + if (!observation) { + status = false; + } + + break; + } + } + + while (getline(in_file , buffer)) { + line++; + +# ifdef DEBUG + cout << line << " " << buffer << endl; +# endif + + position = buffer.find('#'); + if (position != string::npos) { + buffer.erase(position); + } + if (!(trim_right_copy_if(buffer , is_any_of(" \t")).empty())) { + status = false; + error.update(STAT_parsing[STATP_FORMAT] , line); + } + } + + if (status) { + markov = new VariableOrderMarkov(imarkov , observation , length); + +# ifdef DEBUG + imarkov->ascii_memory_tree_print(cout); + markov->ascii_write(cout); +# endif + + } + + delete imarkov; + delete observation; + } + } + } + + return markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing on a single line of a VariableOrderMarkov object. + * + * \param[in,out] os stream. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkov::line_write(ostream &os) const + +{ + os << nb_state << " " << STAT_word[STATW_STATES] << " " + << SEQ_label[SEQL_MAX_ORDER] << " " << max_order; + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkov object and the associated data structure. + * + * \param[in,out] os stream, + * \param[in] seq pointer on a VariableOrderMarkovData object, + * \param[in] exhaustive flag detail level, + * \param[in] file_flag flag file, + * \param[in] hidden flag hidden model. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkov::ascii_write(ostream &os , const VariableOrderMarkovData *seq , + bool exhaustive , bool file_flag , bool hidden) const + +{ + bool **logic_transition; + int i , j , k; + int buff , variable , max_memory_count , *memory_count , width[2]; + double standard_normal_value , half_confidence_interval , **distance; + FrequencyDistribution *marginal_dist = NULL , **observation_dist = NULL; + Histogram *marginal_histo = NULL , **observation_histo = NULL; + SequenceCharacteristics *characteristics = NULL; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::left , ios::adjustfield); + + if (hidden) { + switch (type) { + case ORDINARY : + os << SEQ_word[SEQW_HIDDEN_MARKOV_CHAIN] << endl; + break; + case EQUILIBRIUM : + os << SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_MARKOV_CHAIN] << endl; + break; + } + } + + else { + switch (type) { + case ORDINARY : + os << SEQ_word[SEQW_MARKOV_CHAIN] << endl; + break; + case EQUILIBRIUM : + os << SEQ_word[SEQW_EQUILIBRIUM_MARKOV_CHAIN] << endl; + break; + } + } + + // writing of the variable-order Markov chain parameters + + ascii_print(os , file_flag); + +// if ((nb_component == 1) && (seq) && ((!hidden) || (seq->type[0] == STATE))) { + if ((seq) && ((!hidden) || (seq->type[0] == STATE))) { + normal dist; + standard_normal_value = quantile(complement(dist , 0.025)); + + if (!hidden) { + width[0] = 0; + for (i = 1;i < nb_row;i++) { + if ((memo_type[i] == TERMINAL) || ((type == ORDINARY) && + (memo_type[i] == NON_TERMINAL))) { + buff = column_width(nb_state , transition[i]); + if (buff > width[0]) { + width[0] = buff; + } + } + } + width[0]++; + } + + memory_count = new int[nb_row]; + max_memory_count = 0; + for (i = 1;i < nb_row;i++) { +// if (memo_type[i] == TERMINAL) { + if ((memo_type[i] == TERMINAL) || ((type == ORDINARY) && + (memo_type[i] == NON_TERMINAL)) || (hidden)) { + memory_count[i] = 0; + for (j = 0;j < nb_state;j++) { + memory_count[i] += seq->chain_data->transition[i][j]; + } + + if (memory_count[i] > max_memory_count) { + max_memory_count = memory_count[i]; + } + } + } + width[1] = column_width(max_memory_count) + ASCII_SPACE; + + os << "\n"; + if (file_flag) { + os << "# "; + } + if (hidden) { + os << SEQ_label[SEQL_TRANSITION_COUNTS] << endl; + } + else { + os << SEQ_label[SEQL_TRANSITION_PROBABILITIY_CONFIDENCE_INTERVAL] << endl; + } + + os << "\n"; + for (i = 1;i < nb_row;i++) { +// if (memo_type[i] == TERMINAL) { + if ((memo_type[i] == TERMINAL) || ((type == ORDINARY) && + (memo_type[i] == NON_TERMINAL)) || (hidden)) { + if (memory_count[i] > 0.) { + if (file_flag) { + os << "# "; + } + + if (hidden) { + for (j = 0;j < nb_state;j++) { + os << setw(width[1]) << seq->chain_data->transition[i][j]; + } + os << " "; + } + + else { + if (memo_type[i] == TERMINAL) { + for (j = 0;j < nb_state;j++) { + if ((transition[i][j] > 0.) && (transition[i][j] < 1.)) { + half_confidence_interval = standard_normal_value * + sqrt(transition[i][j] * (1. - transition[i][j]) / memory_count[i]); + os << setw(width[0]) << MAX(transition[i][j] - half_confidence_interval , 0.) + << setw(width[0]) << MIN(transition[i][j] + half_confidence_interval , 1.) + << "| "; + } + else { + os << setw(width[0]) << " " + << setw(width[0]) << " " + << "| "; + } + } + } + + else { + for (j = 0;j < nb_state;j++) { + os << setw(width[0]) << " " + << setw(width[0]) << " " + << "| "; + } + } + } + + os << setw(width[1]) << memory_count[i] << " "; + + for (j = max_order - 1;j >= order[i];j--) { + os << " "; + } + for (j = order[i] - 1;j >= 0;j--) { + os << state[i][j] << " "; + } + + os << endl; + } + } + } + + delete [] memory_count; + } + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + } + else { + characteristics = NULL; + } + + state_process->ascii_print(os , 0 , NULL , NULL , characteristics , + exhaustive , file_flag); + + if (hidden) { + for (i = 0;i < nb_output_process;i++) { + if (discrete_parametric_process[i]) { + if (discrete_parametric_process[i]->weight) { + width[0] = column_width(nb_state , discrete_parametric_process[i]->weight->mass); + } + else { + width[0] = 0; + } + if (discrete_parametric_process[i]->restoration_weight) { + buff = column_width(nb_state , discrete_parametric_process[i]->restoration_weight->mass); + if (buff > width[0]) { + width[0] = buff; + } + } + width[0]++; + + if (discrete_parametric_process[i]->weight) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_THEORETICAL] << " " << SEQ_label[SEQL_STATE_PROBABILITY] << ": "; + + for (j = 0;j < nb_state;j++) { + os << setw(width[0]) << discrete_parametric_process[i]->weight->mass[j]; + } + os << endl; + } + + if (discrete_parametric_process[i]->restoration_weight) { + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_RESTORATION] << " " << SEQ_label[SEQL_STATE_PROBABILITY] << ": "; + + for (j = 0;j < nb_state;j++) { + os << setw(width[0]) << discrete_parametric_process[i]->restoration_weight->mass[j]; + } + os << endl; + } + break; + } + + else if (continuous_parametric_process[i]) { + if (continuous_parametric_process[i]->weight) { + width[0] = column_width(nb_state , continuous_parametric_process[i]->weight->mass); + } + else { + width[0] = 0; + } + if (continuous_parametric_process[i]->restoration_weight) { + buff = column_width(nb_state , continuous_parametric_process[i]->restoration_weight->mass); + if (buff > width[0]) { + width[0] = buff; + } + } + width[0]++; + + if (continuous_parametric_process[i]->weight) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_THEORETICAL] << " " << SEQ_label[SEQL_STATE_PROBABILITY] << ": "; + + for (j = 0;j < nb_state;j++) { + os << setw(width[0]) << continuous_parametric_process[i]->weight->mass[j]; + } + os << endl; + } + + if (continuous_parametric_process[i]->restoration_weight) { + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_RESTORATION] << " " << SEQ_label[SEQL_STATE_PROBABILITY] << ": "; + + for (j = 0;j < nb_state;j++) { + os << setw(width[0]) << continuous_parametric_process[i]->restoration_weight->mass[j]; + } + os << endl; + } + break; + } + } + + os << "\n" << nb_output_process << " " + << STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES] << endl; + } + + // writing of the distributions associated with each observation process + + if (hidden) { + logic_transition = logic_transition_computation(); + + distance = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + distance[i] = new double[nb_state]; + } + } + + for (i = 0;i < nb_output_process;i++) { + os << "\n" << STAT_word[STATW_OUTPUT_PROCESS]; + + if (hidden) { + os << " " << i + 1; + + if (categorical_process[i]) { + os << " : " << STAT_word[STATW_CATEGORICAL]; + } + else if (discrete_parametric_process[i]) { + os << " : " << STAT_word[STATW_DISCRETE_PARAMETRIC]; + } + else { + os << " : " << STAT_word[STATW_CONTINUOUS_PARAMETRIC]; + } + } + os << endl; + + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + if (seq->observation_distribution) { + observation_dist = seq->observation_distribution[variable]; + } + marginal_dist = seq->marginal_distribution[variable]; + + if (seq->observation_histogram) { + observation_histo = seq->observation_histogram[variable]; + } + marginal_histo = seq->marginal_histogram[variable]; + + characteristics = seq->characteristics[variable]; + } + + if (categorical_process[i]) { + categorical_process[i]->ascii_print(os , i + 1 , observation_dist , marginal_dist , + characteristics , exhaustive , file_flag); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + distance[j][j] = 0.; + + for (k = j + 1;k < nb_state;k++) { + if ((logic_transition[j][k]) || (logic_transition[k][j])) { + distance[j][k] = categorical_process[i]->observation[j]->overlap_distance_computation(*(categorical_process[i]->observation[k])); + } + else { + distance[j][k] = 1.; + } + + distance[k][j] = distance[j][k]; + } + } + } + } + + else if (discrete_parametric_process[i]) { + discrete_parametric_process[i]->ascii_print(os , observation_dist , marginal_dist , + exhaustive , file_flag); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + distance[j][j] = 0.; + + for (k = j + 1;k < nb_state;k++) { + if ((logic_transition[j][k]) || (logic_transition[k][j])) { + distance[j][k] = discrete_parametric_process[i]->observation[j]->sup_norm_distance_computation(*(discrete_parametric_process[i]->observation[k])); + } + else { + distance[j][k] = 1.; + } + + distance[k][j] = distance[j][k]; + } + } + } + } + + else { + continuous_parametric_process[i]->ascii_print(os , observation_histo , observation_dist , + marginal_histo , marginal_dist , + exhaustive , file_flag); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + distance[j][j] = 0.; + + for (k = j + 1;k < nb_state;k++) { + if ((logic_transition[j][k]) || (logic_transition[k][j])) { + distance[j][k] = continuous_parametric_process[i]->observation[j]->sup_norm_distance_computation(*(continuous_parametric_process[i]->observation[k])); + } + else { + distance[j][k] = 1.; + } + + distance[k][j] = distance[j][k]; + } + } + } + } + + if (hidden) { + width[0] = column_width(nb_state , distance[0]); + for (j = 1;j < nb_state;j++) { + buff = column_width(nb_state , distance[j]); + if (buff > width[0]) { + width[0] = buff; + } + } + width[0] += ASCII_SPACE; + + os.setf(ios::left , ios::adjustfield); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_CONSECUTIVE_STATE_OBSERVATION_DISTRIBUTION_DISTANCE] << endl; + + for (j = 0;j < nb_state;j++) { + if (file_flag) { + os << "# "; + } + for (k = 0;k < nb_state;k++) { + if ((k != j) && (logic_transition[j][k])) { + os << setw(width[0]) << distance[j][k]; + } + else { + os << setw(width[0]) << "_"; + } + } + os << endl; + } + } + } + + if (hidden) { + for (i = 0;i < nb_state;i++) { + delete [] logic_transition[i]; + } + delete [] logic_transition; + + for (i = 0;i < nb_state;i++) { + delete [] distance[i]; + } + delete [] distance; + } + + if (seq) { + int nb_parameter = nb_parameter_computation(hidden ? MIN_PROBABILITY : 0.) , nb_transient_parameter; + double information; + + + // writing of the sequence length frequency distribution + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " - "; + seq->length_distribution->ascii_characteristic_print(os , false , file_flag); + + if (exhaustive) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << " | " << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + seq->length_distribution->ascii_print(os , file_flag); + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_CUMUL_LENGTH] << ": " << seq->cumul_length << endl; + + // writing of the information quantity of the observed sequences in the i.i.d. case + + for (i = 0;i < seq->nb_variable;i++) { + if (seq->type[i] == REAL_VALUE) { + break; + } + } + + if (i == seq->nb_variable) { + information = seq->iid_information_computation(); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_IID_INFORMATION] << ": " << information << " (" + << information / seq->cumul_length << ")" << endl; + } + + // writing of the (penalized) log-likelihoods of the model for sequences + + if (hidden) { + if (seq->restoration_likelihood != D_INF) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD] << ": " << seq->restoration_likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << seq->restoration_likelihood / seq->cumul_length << ")" << endl; + } + + if (seq->sample_entropy != D_DEFAULT) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << ": " << seq->sample_entropy << " (" + << STAT_label[STATL_NORMALIZED] << ": " << seq->sample_entropy / seq->cumul_length << ")" << endl; + } + + if (seq->likelihood != D_INF) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << ": " << seq->likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << seq->likelihood / seq->cumul_length << ")" << endl; + } + } + + else { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << STAT_label[STATL_LIKELIHOOD] << ": " << seq->likelihood << " (" + << STAT_label[STATL_NORMALIZED] << ": " << seq->likelihood / seq->cumul_length << ")" << endl; + } + + if (seq->likelihood != D_INF) { + if (type == ORDINARY) { + nb_transient_parameter = nb_transient_parameter_computation(hidden ? MIN_PROBABILITY : 0.); + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_transient_parameter << " " + << SEQ_label[nb_transient_parameter == 1 ? SEQL_FREE_TRANSIENT_PARAMETER : SEQL_FREE_TRANSIENT_PARAMETERS] << endl; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AIC] << "): " + << 2 * (seq->likelihood - nb_parameter) << endl; + + if ((type == ORDINARY) && (nb_transient_parameter > 0)) { + if (file_flag) { + os << "# "; + } + os << nb_transient_parameter + nb_parameter << " " << STAT_label[STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AIC] << "): " + << 2 * (seq->likelihood - nb_transient_parameter - nb_parameter) << endl; + } + + if (nb_parameter < seq->cumul_length - 1) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AICc] << "): " + << 2 * (seq->likelihood - (double)(nb_parameter * seq->cumul_length) / + (double)(seq->cumul_length - nb_parameter - 1)) << endl; + } + + if ((type == ORDINARY) && (nb_transient_parameter > 0) && + (nb_transient_parameter + nb_parameter < seq->cumul_length - 1)) { + if (file_flag) { + os << "# "; + } + os << nb_transient_parameter + nb_parameter << " " << STAT_label[STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AICc] << "): " + << 2 * (seq->likelihood - (double)((nb_transient_parameter + nb_parameter) * seq->cumul_length) / + (double)(seq->cumul_length - nb_transient_parameter - nb_parameter - 1)) << endl; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BIC] << "): " + << 2 * seq->likelihood - nb_parameter * log((double)seq->cumul_length) << endl; + + if ((type == ORDINARY) && (nb_transient_parameter > 0)) { + if (file_flag) { + os << "# "; + } + os << nb_transient_parameter + nb_parameter << " " << STAT_label[STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BIC] << "): " + << 2 * seq->likelihood - (nb_transient_parameter + nb_parameter) * log((double)seq->cumul_length) << endl; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter + (type == ORDINARY ? nb_transient_parameter : 0) << " " << STAT_label[STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BICc] << "): " + << 2 * seq->likelihood - penalty_computation(hidden , (hidden ? MIN_PROBABILITY : 0.)) << endl; + } + + if ((hidden) && (seq->likelihood != D_INF)) { + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICL] << "): " + << 2 * (seq->likelihood - seq->sample_entropy) - nb_parameter * log((double)seq->cumul_length) << endl; + + if ((type == ORDINARY) && (nb_transient_parameter > 0)) { + if (file_flag) { + os << "# "; + } + os << nb_transient_parameter + nb_parameter << " " << STAT_label[STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICL] << "): " + << 2 * (seq->likelihood - seq->sample_entropy) - (nb_transient_parameter + nb_parameter) * log((double)seq->cumul_length) << endl; + } + + os << "\n"; + if (file_flag) { + os << "# "; + } + os << nb_parameter + (type == ORDINARY ? nb_transient_parameter : 0) << " " << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICLc] << "): " + << 2 * (seq->likelihood - seq->sample_entropy) - penalty_computation(hidden , MIN_PROBABILITY) << endl; + } + } + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkov object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkov::ascii_write(ostream &os , bool exhaustive) const + +{ + return ascii_write(os , markov_data , exhaustive , false); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkov object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkov::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + ascii_write(out_file , markov_data , exhaustive , true); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkov object and the associated data structure + * in a file at the spreadsheet format. + * + * \param[in,out] os stream, + * \param[in] seq pointer on a VariableOrderMarkovData object, + * \param[in] hidden flag hidden model. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkov::spreadsheet_write(ostream &os , + const VariableOrderMarkovData *seq , + bool hidden) const + +{ + bool **logic_transition; + int i , j , k; + int variable; + double **distance; + FrequencyDistribution *marginal_dist = NULL , **observation_dist = NULL; + Histogram *marginal_histo = NULL , **observation_histo = NULL; + SequenceCharacteristics *characteristics = NULL; + + + if (hidden) { + switch (type) { + case ORDINARY : + os << SEQ_word[SEQW_HIDDEN_MARKOV_CHAIN] << endl; + break; + case EQUILIBRIUM : + os << SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_MARKOV_CHAIN] << endl; + break; + } + } + + else { + switch (type) { + case ORDINARY : + os << SEQ_word[SEQW_MARKOV_CHAIN] << endl; + break; + case EQUILIBRIUM : + os << SEQ_word[SEQW_EQUILIBRIUM_MARKOV_CHAIN] << endl; + break; + } + } + + // writing of the variable-order Markov chain parameters + + spreadsheet_print(os); + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + } + else { + characteristics = NULL; + } + + state_process->spreadsheet_print(os , 0 , NULL , NULL , characteristics); + + // writing of the distributions associated with each observation process + + if (hidden) { + os << "\n" << nb_output_process << "\t" + << STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES] << endl; + } + + if (hidden) { + logic_transition = logic_transition_computation(); + + distance = new double*[nb_state]; + for (i = 0;i < nb_state;i++) { + distance[i] = new double[nb_state]; + } + } + + for (i = 0;i < nb_output_process;i++) { + os << "\n" << STAT_word[STATW_OUTPUT_PROCESS]; + + if (hidden) { + os << "\t" << i + 1; + + if (categorical_process[i]) { + os << "\t" << STAT_word[STATW_CATEGORICAL]; + } + else if (discrete_parametric_process[i]) { + os << "\t" << STAT_word[STATW_DISCRETE_PARAMETRIC]; + } + else { + os << "\t" << STAT_word[STATW_CONTINUOUS_PARAMETRIC]; + } + } + os << endl; + + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + if (seq->observation_distribution) { + observation_dist = seq->observation_distribution[variable]; + } + marginal_dist = seq->marginal_distribution[variable]; + + if (seq->observation_histogram) { + observation_histo = seq->observation_histogram[variable]; + } + marginal_histo = seq->marginal_histogram[variable]; + + characteristics = seq->characteristics[variable]; + } + + if (categorical_process[i]) { + categorical_process[i]->spreadsheet_print(os , i + 1 , observation_dist , marginal_dist , + characteristics); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + for (k = j + 1;k < nb_state;k++) { + if ((logic_transition[j][k]) || (logic_transition[k][j])) { + distance[j][k] = categorical_process[i]->observation[j]->overlap_distance_computation(*(categorical_process[i]->observation[k])); + distance[k][j] = distance[j][k]; + } + } + } + } + } + + else if (discrete_parametric_process[i]) { + discrete_parametric_process[i]->spreadsheet_print(os , observation_dist , marginal_dist); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + for (k = j + 1;k < nb_state;k++) { + if ((logic_transition[j][k]) || (logic_transition[k][j])) { + distance[j][k] = discrete_parametric_process[i]->observation[j]->sup_norm_distance_computation(*(discrete_parametric_process[i]->observation[k])); + distance[k][j] = distance[j][k]; + } + } + } + } + } + + else { + continuous_parametric_process[i]->spreadsheet_print(os , observation_histo , observation_dist , + marginal_histo , marginal_dist); + + if (hidden) { + for (j = 0;j < nb_state;j++) { + for (k = j + 1;k < nb_state;k++) { + if ((logic_transition[j][k]) || (logic_transition[k][j])) { + distance[j][k] = continuous_parametric_process[i]->observation[j]->sup_norm_distance_computation(*(continuous_parametric_process[i]->observation[k])); + distance[k][j] = distance[j][k]; + } + } + } + } + } + + if (hidden) { + os << "\n" << STAT_label[STATL_CONSECUTIVE_STATE_OBSERVATION_DISTRIBUTION_DISTANCE] << endl; + + for (j = 0;j < nb_state;j++) { + for (k = 0;k < nb_state;k++) { + if ((k != j) && (logic_transition[j][k])) { + os << distance[j][k]; + } + os << "\t"; + } + os << endl; + } + } + } + + if (hidden) { + for (i = 0;i < nb_state;i++) { + delete [] logic_transition[i]; + } + delete [] logic_transition; + + for (i = 0;i < nb_state;i++) { + delete [] distance[i]; + } + delete [] distance; + } + + if (seq) { + int nb_parameter = nb_parameter_computation(hidden ? MIN_PROBABILITY : 0.) , nb_transient_parameter; + double information; + + + // writing of the sequence length frequency distribution + + os << "\n" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << "\t"; + seq->length_distribution->spreadsheet_characteristic_print(os); + + os << "\n\t" << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << endl; + seq->length_distribution->spreadsheet_print(os); + + os << "\n" << SEQ_label[SEQL_CUMUL_LENGTH] << "\t" << seq->cumul_length << endl; + + // writing of the information quantity of the observed sequences in the i.i.d. case + + for (i = 0;i < seq->nb_variable;i++) { + if (seq->type[i] == REAL_VALUE) { + break; + } + } + + if (i == seq->nb_variable) { + information = seq->iid_information_computation(); + + os << "\n" << SEQ_label[SEQL_IID_INFORMATION] << "\t" << information << "\t" + << information / seq->cumul_length << endl; + } + + // writing of the (penalized) log-likelihoods of the model for sequences + + if (hidden) { + if (seq->restoration_likelihood != D_INF) { + os << "\n" << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD] << "\t" << seq->restoration_likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << seq->restoration_likelihood / seq->cumul_length << endl; + } + + if (seq->sample_entropy != D_DEFAULT) { + os << "\n" << SEQ_label[SEQL_STATE_SEQUENCE_ENTROPY] << "\t" << seq->sample_entropy << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << seq->sample_entropy / seq->cumul_length << endl; + } + + if (seq->likelihood != D_INF) { + os << "\n" << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD] << "\t" << seq->likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << seq->likelihood / seq->cumul_length << endl; + } + } + + else { + os << "\n" << STAT_label[STATL_LIKELIHOOD] << "\t" << seq->likelihood << "\t" + << STAT_label[STATL_NORMALIZED] << "\t" << seq->likelihood / seq->cumul_length << endl; + } + + if (seq->likelihood != D_INF) { + if (type == ORDINARY) { + nb_transient_parameter = nb_transient_parameter_computation(hidden ? MIN_PROBABILITY : 0.); + + os << "\n" << nb_transient_parameter << "\t" + << SEQ_label[nb_transient_parameter == 1 ? SEQL_FREE_TRANSIENT_PARAMETER : SEQL_FREE_TRANSIENT_PARAMETERS] << endl; + } + + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AIC] << ")\t" + << 2 * (seq->likelihood - nb_parameter) << endl; + if ((type == ORDINARY) && (nb_transient_parameter > 0)) { + os << nb_transient_parameter + nb_parameter << "\t" << STAT_label[STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AIC] << ")\t" + << 2 * (seq->likelihood - nb_transient_parameter - nb_parameter) << endl; + } + + if (nb_parameter < seq->cumul_length - 1) { + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AICc] << ")\t" + << 2 * (seq->likelihood - (double)(nb_parameter * seq->cumul_length) / + (double)(seq->cumul_length - nb_parameter - 1)) << endl; + } + if ((type == ORDINARY) && (nb_transient_parameter > 0) && + (nb_transient_parameter + nb_parameter < seq->cumul_length - 1)) { + os << nb_transient_parameter + nb_parameter << "\t" << STAT_label[STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[AICc] << ")\t" + << 2 * (seq->likelihood - (double)((nb_transient_parameter + nb_parameter) * seq->cumul_length) / + (double)(seq->cumul_length - nb_transient_parameter - nb_parameter - 1)) << endl; + } + + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BIC] << ")\t" + << 2 * seq->likelihood - nb_parameter * log((double)seq->cumul_length) << endl; + if ((type == ORDINARY) && (nb_transient_parameter > 0)) { + os << nb_transient_parameter + nb_parameter << "\t" << STAT_label[STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BIC] << ")\t" + << 2 * seq->likelihood - (nb_transient_parameter + nb_parameter) * log((double)seq->cumul_length) << endl; + } + + os << "\n" << nb_parameter + (type == ORDINARY ? nb_transient_parameter : 0) << "\t" << STAT_label[STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[BICc] << ")\t" + << 2 * seq->likelihood - penalty_computation(hidden , (hidden ? MIN_PROBABILITY : 0.)) << endl; + } + + if ((hidden) && (seq->likelihood != D_INF)) { + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICL] << ")\t" + << 2 * (seq->likelihood - seq->sample_entropy) - nb_parameter * log((double)seq->cumul_length) << endl; + if ((type == ORDINARY) && (nb_transient_parameter > 0)) { + os << nb_transient_parameter + nb_parameter << "\t" << STAT_label[STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICL] << ")\t" + << 2 * (seq->likelihood - seq->sample_entropy) - (nb_transient_parameter + nb_parameter) * log((double)seq->cumul_length) << endl; + } + + os << "\n" << nb_parameter << "\t" << STAT_label[nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] << "\t" + << "2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" << STAT_criterion_word[ICLc] << ")\t" + << 2 * (seq->likelihood - seq->sample_entropy) - penalty_computation(hidden , MIN_PROBABILITY) << endl; + } + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkov object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkov::spreadsheet_write(StatError &error , const string path) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + spreadsheet_write(out_file , markov_data); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a VariableOrderMarkov object and the associated data structure + * using Gnuplot. + * + * \param[in] prefix file prefix, + * \param[in] title figure title, + * \param[in] seq pointer on a VariableOrderMarkovData object. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkov::plot_write(const char *prefix , const char *title , + const VariableOrderMarkovData *seq) const + +{ + bool status; + int i; + int variable , nb_value = I_DEFAULT; + double *empirical_cdf[2]; + FrequencyDistribution *length_distribution = NULL , *marginal_dist = NULL , **observation_dist = NULL; + Histogram *marginal_histo = NULL , **observation_histo = NULL; + SequenceCharacteristics *characteristics = NULL; + + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + length_distribution = seq->length_distribution; + } + else { + characteristics = NULL; + } + + status = state_process->plot_print(prefix , title , 0 , NULL , NULL , + characteristics , length_distribution); + + if (status) { + if (seq) { + length_distribution = seq->length_distribution; + } + + for (i = 0;i < nb_output_process;i++) { + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + if (seq->observation_distribution) { + observation_dist = seq->observation_distribution[variable]; + } + marginal_dist = seq->marginal_distribution[variable]; + + if (seq->observation_histogram) { + observation_histo = seq->observation_histogram[variable]; + } + marginal_histo = seq->marginal_histogram[variable]; + + characteristics = seq->characteristics[variable]; + + if (continuous_parametric_process[i]) { + nb_value = seq->cumulative_distribution_function_computation(variable , empirical_cdf); + } + } + + if (categorical_process[i]) { + categorical_process[i]->plot_print(prefix , title , i + 1 , observation_dist , + marginal_dist , characteristics , + length_distribution); + } + else if (discrete_parametric_process[i]) { + discrete_parametric_process[i]->plot_print(prefix , title , i + 1 , observation_dist , + marginal_dist); + } + else { + continuous_parametric_process[i]->plot_print(prefix , title , i + 1 , + observation_histo , observation_dist , + marginal_histo , marginal_dist , + nb_value , (seq ? empirical_cdf : NULL)); + if (seq) { + delete [] empirical_cdf[0]; + delete [] empirical_cdf[1]; + } + } + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a VariableOrderMarkov object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkov::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status = plot_write(prefix , title , markov_data); + + error.init(); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a VariableOrderMarkov object and the associated data structure. + * + * \param[in] seq pointer on a VariableOrderMarkovData object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* VariableOrderMarkov::get_plotable(const VariableOrderMarkovData *seq) const + +{ + int i , j; + int nb_plot_set , index_length , index , variable; + FrequencyDistribution *length_distribution = NULL , *marginal_dist = NULL , **observation_dist = NULL; + Histogram *marginal_histo = NULL , **observation_histo = NULL; + SequenceCharacteristics *characteristics = NULL; + MultiPlotSet *plot_set; + + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + } + else { + characteristics = NULL; + } + + // computation of the number of plots + + nb_plot_set = 0; + + if ((state_process->index_value) || (characteristics)) { + nb_plot_set++; + + if (characteristics) { + index_length = characteristics->index_value->plot_length_computation(); + + if (characteristics->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + nb_plot_set++; + } + nb_plot_set++; + } + } + + if ((state_process->first_occurrence) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((state_process->first_occurrence) && + (state_process->first_occurrence[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->first_occurrence[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((state_process->recurrence_time) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((state_process->recurrence_time) && + (state_process->recurrence_time[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((state_process->sojourn_time) || (characteristics)) { + for (i = 0;i < nb_state;i++) { + if ((state_process->sojourn_time) && + (state_process->sojourn_time[i])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[i]->nb_element > 0)) { + nb_plot_set++; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->initial_run) && + (characteristics->initial_run[i]->nb_element > 0)) { + nb_plot_set++; + } + + if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->final_run[i]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((state_process->nb_run) || (state_process->nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + for (i = 0;i < nb_state;i++) { + if (state_process->nb_run) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_run) && (characteristics->nb_run[i]->nb_element > 0)) { + nb_plot_set++; + } + + if (state_process->nb_occurrence) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[i]->nb_element > 0)) { + nb_plot_set++; + } + } + + if ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence)) { + nb_plot_set++; + } + } + + for (i = 0;i < nb_output_process;i++) { + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + characteristics = seq->characteristics[variable]; + } + + if (categorical_process[i]) { + if ((categorical_process[i]->index_value) || (characteristics)) { + nb_plot_set++; + + if (characteristics) { + index_length = characteristics->index_value->plot_length_computation(); + + if (characteristics->index_value->frequency[index_length - 1] < MAX_FREQUENCY) { + nb_plot_set++; + } + nb_plot_set++; + } + } + + if ((categorical_process[i]->first_occurrence) || (characteristics)) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if ((categorical_process[i]->first_occurrence) && + (categorical_process[i]->first_occurrence[j])) { + nb_plot_set++; + } + else if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->first_occurrence[j]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((categorical_process[i]->recurrence_time) || (characteristics)) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if ((categorical_process[i]->recurrence_time) && + (categorical_process[i]->recurrence_time[j])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->recurrence_time[j]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((categorical_process[i]->sojourn_time) || (characteristics)) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if ((categorical_process[i]->sojourn_time) && + (categorical_process[i]->sojourn_time[j])) { + nb_plot_set++; + } + else if ((characteristics) && (i < characteristics->nb_value) && + (characteristics->sojourn_time[j]->nb_element > 0)) { + nb_plot_set++; + } + +/* if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->initial_run) && + (characteristics->initial_run[j]->nb_element > 0)) { + nb_plot_set++; + } */ + + if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->final_run[j]->nb_element > 0)) { + nb_plot_set++; + } + } + } + + if ((categorical_process[i]->nb_run) || (categorical_process[i]->nb_occurrence) || + ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence))) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if (categorical_process[i]->nb_run) { + nb_plot_set++; + } + else if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->nb_run) && (characteristics->nb_run[j]->nb_element > 0)) { + nb_plot_set++; + } + + if (categorical_process[i]->nb_occurrence) { + nb_plot_set++; + } + else if ((characteristics) && (j < characteristics->nb_value) && + (characteristics->nb_occurrence) && + (characteristics->nb_occurrence[j]->nb_element > 0)) { + nb_plot_set++; + } + } + + if ((characteristics) && (characteristics->nb_run) && (characteristics->nb_occurrence)) { + nb_plot_set++; + } + } + } + + if ((seq->observation_distribution) || (seq->observation_histogram)) { + nb_plot_set += nb_state; + } + else { + nb_plot_set++; + } + + if ((categorical_process[i]) && (seq->marginal_distribution[variable])) { + if ((categorical_process[i]->weight) && + (categorical_process[i]->mixture)) { + nb_plot_set++; + } + if ((categorical_process[i]->restoration_weight) && + (categorical_process[i]->restoration_mixture)) { + nb_plot_set++; + } + } + + if ((discrete_parametric_process[i]) && (seq->marginal_distribution[variable])) { + if ((discrete_parametric_process[i]->weight) && + (discrete_parametric_process[i]->mixture)) { + nb_plot_set += 2; + } + if ((discrete_parametric_process[i]->restoration_weight) && + (discrete_parametric_process[i]->restoration_mixture)) { + nb_plot_set += 2; + } + } + + if ((continuous_parametric_process[i]) && ((seq->marginal_histogram[variable]) || + (seq->marginal_distribution[variable]))) { + if (continuous_parametric_process[i]->weight) { + nb_plot_set += 2; + } + if (continuous_parametric_process[i]->restoration_weight) { + nb_plot_set += 2; + } + } + } + + plot_set = new MultiPlotSet(nb_plot_set , nb_output_process + 1); + plot_set->border = "15 lw 0"; + + if ((seq) && (seq->type[0] == STATE)) { + characteristics = seq->characteristics[0]; + length_distribution = seq->length_distribution; + } + else { + characteristics = NULL; + } + + index = 0; + plot_set->variable_nb_viewpoint[0] = 0; + state_process->plotable_write(*plot_set , index , 0 , NULL , NULL , characteristics , + length_distribution); + + if (seq) { + length_distribution = seq->length_distribution; + } + + for (i = 0;i < nb_output_process;i++) { + if (seq) { + switch (seq->type[0]) { + case STATE : + variable = i + 1; + break; + default : + variable = i; + break; + } + + if (seq->observation_distribution) { + observation_dist = seq->observation_distribution[variable]; + } + marginal_dist = seq->marginal_distribution[variable]; + + if (seq->observation_histogram) { + observation_histo = seq->observation_histogram[variable]; + } + marginal_histo = seq->marginal_histogram[variable]; + + characteristics = seq->characteristics[variable]; + } + + if (categorical_process[i]) { + plot_set->variable_nb_viewpoint[i] = 0; + categorical_process[i]->plotable_write(*plot_set , index , i + 1 , observation_dist , + marginal_dist , characteristics , + length_distribution); + } + else if (discrete_parametric_process[i]){ + discrete_parametric_process[i]->plotable_write(*plot_set , index , i + 1 , observation_dist , + marginal_dist); + } + else { + continuous_parametric_process[i]->plotable_write(*plot_set , index , i + 1 , + observation_histo , observation_dist , + marginal_histo , marginal_dist); + } + } + + return plot_set; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a VariableOrderMarkov object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* VariableOrderMarkov::get_plotable() const + +{ + return get_plotable(markov_data); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the number of parameters of a VariableOrderMarkov object. + * + * \param[in] min_probability minimum probability. + * + * \return number of parameters. + */ +/*--------------------------------------------------------------*/ + +int VariableOrderMarkov::nb_parameter_computation(double min_probability) const + +{ + int i; + int nb_parameter = VariableOrderMarkovChain::nb_parameter_computation(min_probability); + + + for (i = 0;i < nb_output_process;i++) { + if (categorical_process[i]) { + nb_parameter += categorical_process[i]->nb_parameter_computation(min_probability); + } + else if (discrete_parametric_process[i]) { + nb_parameter += discrete_parametric_process[i]->nb_parameter_computation(); + } + else { + nb_parameter += continuous_parametric_process[i]->nb_parameter_computation(); + } + } + + return nb_parameter; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of an adaptative penalty. + * + * \param[in] hidden flag hidden model, + * \param[in] min_probability minimum probability. + * + * \return adaptative penalty. + */ +/*--------------------------------------------------------------*/ + +double VariableOrderMarkov::penalty_computation(bool hidden , double min_probability) const + +{ + int i , j , k; + int nb_parameter , sample_size; + double sum , *state_marginal , *memory; + double penalty = 0.; + + + if (markov_data) { + if (hidden) { + switch (type) { + + case ORDINARY : { + memory = memory_computation(); + + sum = 0.; + for (i = 1;i < nb_row;i++) { +// if (memo_type[i] == TERMINAL) { + sum += memory[i]; +// } + } + for (i = 1;i < nb_row;i++) { +// if (memo_type[i] == TERMINAL) { + memory[i] /= sum; +// } + } + break; + } + + case EQUILIBRIUM : { + memory = new double[nb_row]; + for (i = 1;i < nb_row;i++) { + memory[i] = initial[i]; + } + break; + } + } + + state_marginal = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + state_marginal[i] = 0.; + } + for (i = 1;i < nb_row;i++) { +// if (memo_type[i] == TERMINAL) { + state_marginal[state[i][0]] += memory[i]; +// } + } + } + + for (i = 1;i < nb_row;i++) { +// if (memo_type[i] == TERMINAL) { + if ((memo_type[i] == TERMINAL) || ((type == ORDINARY) && + (memo_type[i] == NON_TERMINAL))) { + nb_parameter = 0; + if (!hidden) { + sample_size = 0; + } + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > min_probability) { + nb_parameter++; + if (!hidden) { + sample_size += markov_data->chain_data->transition[i][j]; + } + } + } + + nb_parameter--; + + if (nb_parameter > 0) { + if (hidden) { + if (memory[i] > 0.) { + penalty += nb_parameter * log(memory[i] * markov_data->cumul_length); + } + } + else { + if (sample_size > 0) { + penalty += nb_parameter * log((double)sample_size); + } + } + } + } + } + + for (i = 0;i < nb_output_process;i++) { + if (categorical_process[i]) { + for (j = 0;j < nb_state;j++) { + nb_parameter = 0; + for (k = 0;k < categorical_process[i]->nb_value;k++) { + if (categorical_process[i]->observation[j]->mass[k] > min_probability) { + nb_parameter++; + } + } + + nb_parameter--; + + if (nb_parameter > 0) { + if (hidden) { + penalty += nb_parameter * log(state_marginal[j] * markov_data->cumul_length); + } + else { + penalty += nb_parameter * + log((double)markov_data->marginal_distribution[0]->frequency[j]); + } + } + } + } + + else if (discrete_parametric_process[i]) { + for (j = 0;j < nb_state;j++) { + nb_parameter = discrete_parametric_process[i]->observation[j]->nb_parameter_computation(); + + if (hidden) { + penalty += nb_parameter * log(state_marginal[j] * markov_data->cumul_length); + } + else { + penalty += nb_parameter * + log((double)markov_data->marginal_distribution[0]->frequency[j]); + } + } + } + + else { + for (j = 0;j < nb_state;j++) { + nb_parameter = continuous_parametric_process[i]->observation[j]->nb_parameter_computation(); + + if (hidden) { + penalty += nb_parameter * log(state_marginal[j] * markov_data->cumul_length); + } + else { + penalty += nb_parameter * + log((double)markov_data->marginal_distribution[0]->frequency[j]); + } + } + } + } + + if (hidden) { + delete [] memory; + delete [] state_marginal; + } + } + + return penalty; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Default constructor of the VariableOrderMarkovData class. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData::VariableOrderMarkovData() + +{ + markov = NULL; + chain_data = NULL; + + likelihood = D_INF; + restoration_likelihood = D_INF; + sample_entropy = D_DEFAULT; + + posterior_probability = NULL; + entropy = NULL; + nb_state_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkovData class. + * + * \param[in] ilength_distribution sequence length frequency distribution, + * \param[in] inb_variable number of variables, + * \param[in] itype variable types, + * \param[in] init_flag flag initialization. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData::VariableOrderMarkovData(const FrequencyDistribution &ilength_distribution , + int inb_variable , variable_nature *itype , bool init_flag) +:MarkovianSequences(ilength_distribution , inb_variable , itype , init_flag) + +{ + markov = NULL; + chain_data = NULL; + + likelihood = D_INF; + restoration_likelihood = D_INF; + sample_entropy = D_DEFAULT; + + posterior_probability = NULL; + entropy = NULL; + nb_state_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a VariableOrderMarkovData object from + * a MarkovianSequences object adding a state variable. + * + * \param[in] seq reference on a MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData::VariableOrderMarkovData(const MarkovianSequences &seq) +:MarkovianSequences(seq , ADD_STATE_VARIABLE , UNCHANGED) + +{ + markov = NULL; + chain_data = NULL; + + likelihood = D_INF; + restoration_likelihood = D_INF; + sample_entropy = D_DEFAULT; + + posterior_probability = NULL; + entropy = NULL; + nb_state_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of a VariableOrderMarkovData object from + * a MarkovianSequences object. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] transform type of transform (SEQUENCE_COPY/ADD_STATE_VARIABLE), + * \param[in] initial_run_flag addition/removing of the initial run length frequency distributions. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData::VariableOrderMarkovData(const MarkovianSequences &seq , + sequence_transformation transform , bool initial_run_flag) +:MarkovianSequences(seq , transform , (initial_run_flag ? ADD_INITIAL_RUN : REMOVE_INITIAL_RUN)) + +{ + markov = NULL; + chain_data = NULL; + + likelihood = D_INF; + restoration_likelihood = D_INF; + sample_entropy = D_DEFAULT; + + posterior_probability = NULL; + entropy = NULL; + nb_state_sequence = NULL; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a VariableOrderMarkovData object. + * + * \param[in] seq reference on a VariableOrderMarkovData object, + * \param[in] model_flag flag copy of the included VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovData::copy(const VariableOrderMarkovData &seq , + bool model_flag) + +{ + int i; + + + if ((model_flag) && (seq.markov)) { + markov = new VariableOrderMarkov(*(seq.markov) , false); + } + else { + markov = NULL; + } + + if (seq.chain_data) { + chain_data = new VariableOrderMarkovChainData(*(seq.chain_data)); + } + else { + chain_data = NULL; + } + + likelihood = seq.likelihood; + restoration_likelihood = seq.restoration_likelihood; + sample_entropy = seq.sample_entropy; + + if (seq.posterior_probability) { + posterior_probability = new double[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + posterior_probability[i] = seq.posterior_probability[i]; + } + } + else { + posterior_probability = NULL; + } + + if (seq.entropy) { + entropy = new double[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + entropy[i] = seq.entropy[i]; + } + } + else { + entropy = NULL; + } + + if (seq.nb_state_sequence) { + nb_state_sequence = new double[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + nb_state_sequence[i] = seq.nb_state_sequence[i]; + } + } + else { + nb_state_sequence = NULL; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the VariableOrderMarkovData class. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData::~VariableOrderMarkovData() + +{ + delete markov; + delete chain_data; + + delete [] posterior_probability; + delete [] entropy; + delete [] nb_state_sequence; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the VariableOrderMarkovData class. + * + * \param[in] seq reference on a VariableOrderMarkovData object. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData& VariableOrderMarkovData::operator=(const VariableOrderMarkovData &seq) + +{ + if (&seq != this) { + delete markov; + delete chain_data; + + delete [] posterior_probability; + delete [] entropy; + delete [] nb_state_sequence; + + remove(); + Sequences::remove(); + + Sequences::copy(seq); + MarkovianSequences::copy(seq); + copy(seq); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Extraction of a frequency distribution. + * + * \param[in] error reference on a StatError object, + * \param[in] histo_type frequency distribution type, + * \param[in] variable variable index, + * \param[in] value state or observation. + * + * \return DiscreteDistributionData object. + */ +/*--------------------------------------------------------------*/ + +DiscreteDistributionData* VariableOrderMarkovData::extract(StatError &error , process_distribution histo_type , + int variable , int value) const + +{ + bool status = true; + Distribution *pdist; + DiscreteParametric *pparam; + FrequencyDistribution *phisto; + DiscreteDistributionData *histo; + CategoricalSequenceProcess *process; + + + histo = NULL; + error.init(); + + if (histo_type == OBSERVATION) { + if ((variable < 2) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if ((value < 0) || (value >= marginal_distribution[0]->nb_value)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << value << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + if (!observation_distribution[variable]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[INT_VALUE]); + } + + else { + phisto = observation_distribution[variable][value]; + + if (phisto->nb_element == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + } + } + } + } + + else { + if ((variable < 1) || (variable > nb_variable)) { + status = false; + error.update(STAT_error[STATR_VARIABLE_INDEX]); + } + + else { + variable--; + + if (!characteristics[variable]) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << variable + 1 << ": " + << SEQ_error[SEQR_CHARACTERISTICS_NOT_COMPUTED]; + error.update((error_message.str()).c_str()); + } + + else if ((value < 0) || (value >= marginal_distribution[variable]->nb_value)) { + status = false; + ostringstream error_message; + error_message << STAT_label[variable == 0 ? STATL_STATE : STATL_OUTPUT] << " " + << value << " " << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + switch (histo_type) { + case FIRST_OCCURRENCE : + phisto = characteristics[variable]->first_occurrence[value]; + break; + case RECURRENCE_TIME : + phisto = characteristics[variable]->recurrence_time[value]; + break; + case SOJOURN_TIME : + phisto = characteristics[variable]->sojourn_time[value]; + break; + case FINAL_RUN : + phisto = characteristics[variable]->final_run[value]; + break; + case NB_RUN : + phisto = characteristics[variable]->nb_run[value]; + break; + case NB_OCCURRENCE : + phisto = characteristics[variable]->nb_occurrence[value]; + break; + } + + if (phisto->nb_element == 0) { + status = false; + error.update(STAT_error[STATR_EMPTY_SAMPLE]); + } + } + } + } + + if (status) { + if (variable == 0) { + process = markov->state_process; + } + else { + process = markov->categorical_process[variable - 1]; + } + + pdist = NULL; + pparam = NULL; + + switch (histo_type) { + + case OBSERVATION : { + if (markov->categorical_process[variable - 1]) { + pdist = markov->categorical_process[variable - 1]->observation[value]; + } + else if (markov->discrete_parametric_process[variable - 1]) { + pparam = markov->discrete_parametric_process[variable - 1]->observation[value]; + } + break; + } + + case FIRST_OCCURRENCE : { + pdist = process->first_occurrence[value]; + break; + } + + case RECURRENCE_TIME : { + pdist = process->recurrence_time[value]; + break; + } + + case SOJOURN_TIME : { + pparam = process->sojourn_time[value]; + break; + } + + case NB_RUN : { + pdist = process->nb_run[value]; + break; + } + + case NB_OCCURRENCE : { + pdist = process->nb_occurrence[value]; + break; + } + } + + if (pdist) { + histo = new DiscreteDistributionData(*phisto , pdist); + } + else { + histo = new DiscreteDistributionData(*phisto , pparam); + } + } + + return histo; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a VariableOrderMarkovData object transforming + * the implicit index parameters in explicit index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* VariableOrderMarkovData::explicit_index_parameter(StatError &error) const + +{ + VariableOrderMarkovData *seq; + + + error.init(); + + if (index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new VariableOrderMarkovData(*this , true , EXPLICIT_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Removing of the index parameters. + * + * \param[in] error reference on a StatError object. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* VariableOrderMarkovData::remove_index_parameter(StatError &error) const + +{ + VariableOrderMarkovData *seq; + + + error.init(); + + if (!index_parameter) { + seq = NULL; + error.update(SEQ_error[SEQR_INDEX_PARAMETER_TYPE]); + } + else { + seq = new VariableOrderMarkovData(*this , true , REMOVE_INDEX_PARAMETER); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of the auxiliary variables corresponding to + * the restored state sequences. + * + * \param[in] error reference on a StatError object. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* VariableOrderMarkovData::build_auxiliary_variable(StatError &error) const + +{ + bool status = true; + int i; + MarkovianSequences *seq; + + + seq = NULL; + error.init(); + + if (type[0] != STATE) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " 1: " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[STATE]); + } + + for (i = 0;i < markov->nb_output_process;i++) { + if (((markov->discrete_parametric_process) && (markov->discrete_parametric_process[i])) || + ((markov->continuous_parametric_process) && (markov->continuous_parametric_process[i]))) { + break; + } + } + + if (i == markov->nb_output_process) { + status = false; + error.update(SEQ_error[SEQR_PARAMETRIC_PROCESS]); + } + + if (status) { + seq = MarkovianSequences::build_auxiliary_variable(markov->discrete_parametric_process , + markov->continuous_parametric_process); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Building of residual sequences on the basis of restored state sequences. + * + * \param[in] error reference on a StatError object. + * + * \return MarkovianSequences object. + */ +/*--------------------------------------------------------------*/ + +MarkovianSequences* VariableOrderMarkovData::residual_sequences(StatError &error) const + +{ + MarkovianSequences *seq; + + + error.init(); + + if (type[0] != STATE) { + seq = NULL; + + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " 1: " + << STAT_error[STATR_VARIABLE_TYPE]; + error.correction_update((error_message.str()).c_str() , STAT_variable_word[STATE]); + } + + else { + seq = MarkovianSequences::residual_sequences(markov->categorical_process , + markov->discrete_parametric_process , + markov->continuous_parametric_process); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkovData object. + * + * \param[in,out] os stream, + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkovData::ascii_write(ostream &os , bool exhaustive) const + +{ + if (markov) { + markov->ascii_write(os , this , exhaustive , false , + CategoricalSequenceProcess::test_hidden(markov->nb_output_process , markov->categorical_process)); + } + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkovData object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkovData::ascii_write(StatError &error , const string path , + bool exhaustive) const + +{ + bool status = false; + + + if (markov) { + ofstream out_file(path.c_str()); + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + markov->ascii_write(out_file , this , exhaustive , true , + CategoricalSequenceProcess::test_hidden(markov->nb_output_process , markov->categorical_process)); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkovData object. + * + * \param[in,out] os stream, + * \param[in] format format (line/column), + * \param[in] exhaustive flag detail level. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkovData::ascii_data_write(ostream &os , output_sequence_format format , + bool exhaustive) const + +{ + MarkovianSequences::ascii_write(os , exhaustive , false); + ascii_print(os , format , false , posterior_probability , entropy , nb_state_sequence); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkovData object. + * + * \param[in] format format (line/column), + * \param[in] exhaustive flag detail level, + * + * \return string. + */ +/*--------------------------------------------------------------*/ + +string VariableOrderMarkovData::ascii_data_write(output_sequence_format format , bool exhaustive) const + +{ + ostringstream oss; + + + ascii_data_write(oss , format , exhaustive); + + return oss.str(); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkovData object in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] format format (line/column), + * \param[in] exhaustive flag detail level. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkovData::ascii_data_write(StatError &error , const string path , + output_sequence_format format , bool exhaustive) const + +{ + bool status = false; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + if (format != 'a') { + MarkovianSequences::ascii_write(out_file , exhaustive , true); + } + ascii_print(out_file , format , true , posterior_probability , entropy , nb_state_sequence); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of a VariableOrderMarkovData object in a file at the spreadsheet format. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkovData::spreadsheet_write(StatError &error , const string path) const + +{ + bool status = false; + + + if (markov) { + ofstream out_file(path.c_str()); + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + markov->spreadsheet_write(out_file , this , + CategoricalSequenceProcess::test_hidden(markov->nb_output_process , markov->categorical_process)); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a VariableOrderMarkovData object using Gnuplot. + * + * \param[in] error reference on a StatError object, + * \param[in] prefix file prefix, + * \param[in] title figure title. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkovData::plot_write(StatError &error , const char *prefix , + const char *title) const + +{ + bool status = false; + + + if (markov) { + status = markov->plot_write(prefix , title , this); + + error.init(); + + if (!status) { + error.update(STAT_error[STATR_FILE_PREFIX]); + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Plot of a VariableOrderMarkovData object. + * + * \return MultiPlotSet object. + */ +/*--------------------------------------------------------------*/ + +MultiPlotSet* VariableOrderMarkovData::get_plotable() const + +{ + MultiPlotSet *plot_set; + + + if (markov) { + plot_set = markov->get_plotable(this); + } + else { + plot_set = NULL; + } + + return plot_set; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/variable_order_markov.h b/src/cpp/sequence_analysis/variable_order_markov.h new file mode 100644 index 0000000..40fe49e --- /dev/null +++ b/src/cpp/sequence_analysis/variable_order_markov.h @@ -0,0 +1,461 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#ifndef VARIABLE_ORDER_MARKOV_H +#define VARIABLE_ORDER_MARKOV_H + + +#include "sequences.h" + + +namespace Stat_trees { + class MarkovOutTree; + MarkovOutTree* markov_out_tree_parsing(StatError& error, + std::ifstream &in_file, + int &line); +}; + + +namespace sequence_analysis { + + + +/**************************************************************** + * + * Constants + */ + + + const int MAX_LAG = 100; // maximum lag for the computation of the autocorrelation coefficients + const int MEMORY_MIN_COUNT = 10; // minimum count for comparing a memory and its children + const double LAPLACE_COEFF = 1.; // Laplace estimator coefficient + + enum memory_type { + NON_TERMINAL , + TERMINAL , + COMPLETION , + PRUNED + }; + + + +/**************************************************************** + * + * Class definition + */ + + + /// \brief Variable-order Markov chain + + class VariableOrderMarkovChain : public stat_tool::Chain { + + public : + + memory_type *memo_type; ///< memory types (NON_TERMINAL/TERMINAL/COMPLETION) + int *order; ///< memory orders + int max_order; ///< maximum memory order + int **state; ///< state succession for each memory + int *parent; ///< parent memories + int **child; ///< child memories + int **next; ///< next memories + int *nb_memory; ///< number of previous memories + int **previous; ///< previous memories + CategoricalSequenceProcess *state_process; ///< state process + + void memory_tree_completion(const VariableOrderMarkovChain &markov); + void build(const VariableOrderMarkovChain &markov); + void copy(const VariableOrderMarkovChain &markov); + void remove(); + + VariableOrderMarkovChain(); + VariableOrderMarkovChain(stat_tool::process_type itype , int inb_state , int inb_row); + VariableOrderMarkovChain(stat_tool::process_type itype , int inb_state , int inb_row , int imax_order); + VariableOrderMarkovChain(stat_tool::process_type itype , int inb_state , int iorder , bool init_flag); + VariableOrderMarkovChain(const VariableOrderMarkovChain &markov) + :Chain(markov) { copy(markov); } + ~VariableOrderMarkovChain(); + VariableOrderMarkovChain& operator=(const VariableOrderMarkovChain &markov); + + void find_parent_memory(int index); + void build_memory_transition(); + void build_previous_memory(); + bool check_free_suffix() const; + bool** logic_transition_computation() const; + void component_computation(); + + void build_non_terminal(); + + void thresholding(double min_probability); + + void max_order_computation(); + int nb_parameter_computation(double min_probability = 0.) const; + int nb_transient_parameter_computation(double min_probability = 0.) const; + + static VariableOrderMarkovChain* parsing(stat_tool::StatError &error , std::ifstream &in_file , + int &line , stat_tool::process_type type); + + std::ostream& ascii_memory_tree_print(std::ostream &os , bool file_flag = false) const; + std::ostream& ascii_transition_tree_print(std::ostream &os , bool file_flag = false) const; + + std::ostream& ascii_print(std::ostream &os , bool file_flag = false) const; + std::ostream& spreadsheet_print(std::ostream &os) const; + + void non_terminal_transition_probability_computation(); + void initial_probability_computation(); + + void index_state_distribution(); + double* memory_computation() const; + void state_no_occurrence_probability(int istate , double increment = LEAVE_INCREMENT); + void state_first_occurrence_distribution(int istate , int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void state_leave_probability(const double *imemory , int istate , + double increment = LEAVE_INCREMENT); + void state_recurrence_time_distribution(const double *imemory , int istate , + int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void state_sojourn_time_distribution(const double *imemory , int istate , + int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void state_nb_pattern_mixture(int istate , stat_tool::count_pattern pattern); + + Correlation* state_autocorrelation_computation(stat_tool::StatError &error , + int istate , int max_lag , + const MarkovianSequences *seq) const; + }; + + + class VariableOrderMarkovChainData; + class VariableOrderMarkovData; + + /// \brief Variable-order Markov chain + + class VariableOrderMarkov : public stat_tool::StatInterface , protected VariableOrderMarkovChain { + + friend class MarkovianSequences; + friend class VariableOrderMarkovIterator; + friend class VariableOrderMarkovChainData; + friend class VariableOrderMarkovData; + friend class Stat_trees::MarkovOutTree; // to be reworked with J.B. + + friend Stat_trees::MarkovOutTree* Stat_trees::markov_out_tree_parsing(stat_tool::StatError& error, // to be reworked with J.B. + std::ifstream &in_file, int &line); + + friend std::ostream& operator<<(std::ostream &os , const VariableOrderMarkov &markov) + { return markov.ascii_write(os); } + + protected : + + int nb_iterator; ///< number of iterators pointing on the VariableOrderMarkov object + VariableOrderMarkovData *markov_data; ///< pointer on a VariableOrderMarkovData object + int nb_output_process; ///< number of observation processes + CategoricalSequenceProcess **categorical_process; ///< categorical observation processes + stat_tool::DiscreteParametricProcess **discrete_parametric_process; ///< discrete parametric observation processes + stat_tool::ContinuousParametricProcess **continuous_parametric_process; ///< continuous parametric observation processes + + VariableOrderMarkov(const VariableOrderMarkovChain *pmarkov , int inb_output_process , + stat_tool::CategoricalProcess **categorical_observation , + stat_tool::DiscreteParametricProcess **discrete_parametric_observation , + stat_tool::ContinuousParametricProcess **continuous_parametric_observation , + int length); + + void copy(const VariableOrderMarkov &markov , bool data_flag = true); + void remove(); + + std::ostream& ascii_write(std::ostream &os , const VariableOrderMarkovData *seq , + bool exhaustive = false , bool file_flag = false , + bool hidden = false) const; + std::ostream& spreadsheet_write(std::ostream &os , const VariableOrderMarkovData *seq , + bool hidden = false) const; + bool plot_write(const char *prefix , const char *title , + const VariableOrderMarkovData *seq) const; + stat_tool::MultiPlotSet* get_plotable(const VariableOrderMarkovData *seq) const; + + int nb_parameter_computation(double min_probability = 0.) const; + double penalty_computation(bool hidden , double min_probability = 0.) const; + + void index_output_distribution(int variable); + void output_no_occurrence_probability(int variable , int output , + double increment = LEAVE_INCREMENT); + void output_first_occurrence_distribution(int variable , int output , + int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void output_leave_probability(const double *memory , + int variable , int output , + double increment = LEAVE_INCREMENT); + void output_recurrence_time_distribution(const double *memory , int variable , + int output , int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void output_sojourn_time_distribution(const double *memory , int variable , + int output , int min_nb_value = 1 , + double cumul_threshold = stat_tool::CUMUL_THRESHOLD); + void output_nb_run_mixture(int variable , int output); + void output_nb_occurrence_mixture(int variable , int output); + + Correlation* output_autocorrelation_computation(stat_tool::StatError &error , int variable , + int output , int max_lag , + const VariableOrderMarkovData *seq) const; + + double likelihood_computation(const VariableOrderMarkovChainData &chain_data) const; + + double likelihood_correction(const VariableOrderMarkovData &seq) const; + + std::ostream& transition_count_ascii_write(std::ostream &os , bool begin) const; + + public : + + VariableOrderMarkov(); + VariableOrderMarkov(stat_tool::process_type itype , int inb_state , int inb_row); + VariableOrderMarkov(stat_tool::process_type itype , int inb_state , int inb_row , int imax_order); + VariableOrderMarkov(stat_tool::process_type itype , int inb_state , int iorder , bool init_flag , + int inb_output_process = 0 , int nb_value = 0); + VariableOrderMarkov(const VariableOrderMarkov &markov , + int inb_output_process , int nb_value); +/* VariableOrderMarkov(const VariableOrderMarkov &markov , + int inb_output_process , int *nb_value); */ + VariableOrderMarkov(const VariableOrderMarkovChain *pmarkov , + const stat_tool::CategoricalProcess *pobservation , int length); + VariableOrderMarkov(const VariableOrderMarkov &markov , bool data_flag = true) + :VariableOrderMarkovChain(markov) { copy(markov , data_flag); } + void conditional_delete(); + ~VariableOrderMarkov(); + VariableOrderMarkov& operator=(const VariableOrderMarkov &markov); + + DiscreteParametricModel* extract(stat_tool::StatError &error , + stat_tool::process_distribution dist_type , + int variable , int value) const; + VariableOrderMarkovData* extract_data(stat_tool::StatError &error) const; + + VariableOrderMarkov* thresholding(double min_probability = MIN_PROBABILITY) const; + + static VariableOrderMarkov* ascii_read(stat_tool::StatError &error , const std::string path , + int length = DEFAULT_LENGTH); + + std::ostream& line_write(std::ostream &os) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + void characteristic_computation(int length , bool counting_flag , int variable = stat_tool::I_DEFAULT); + void characteristic_computation(const VariableOrderMarkovData &seq , bool counting_flag , + int variable = stat_tool::I_DEFAULT , bool length_flag = true); + + Correlation* state_autocorrelation_computation(stat_tool::StatError &error , int istate , + int max_lag = MAX_LAG) const; + Correlation* output_autocorrelation_computation(stat_tool::StatError &error , int variable , + int output , int max_lag = MAX_LAG) const; + + double likelihood_computation(const MarkovianSequences &seq , int index) const; + double likelihood_computation(const VariableOrderMarkovData &seq) const; + + VariableOrderMarkovData* simulation(stat_tool::StatError &error , const FrequencyDistribution &hlength , + bool counting_flag = true , bool divergence_flag = false) const; + VariableOrderMarkovData* simulation(stat_tool::StatError &error , int nb_sequence , + int length , bool counting_flag = true) const; + VariableOrderMarkovData* simulation(stat_tool::StatError &error , int nb_sequence , + const MarkovianSequences &iseq , + bool counting_flag = true) const; + + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const VariableOrderMarkov **imarkov , + stat_tool::FrequencyDistribution **hlength , + const std::string path = "") const; + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const VariableOrderMarkov **markov , int nb_sequence , + int length , const std::string path = "") const; + stat_tool::DistanceMatrix* divergence_computation(stat_tool::StatError &error , std::ostream *os , int nb_model , + const VariableOrderMarkov **markov , int nb_sequence , + const MarkovianSequences **seq , const std::string path = "") const; + + // class member access + + int get_nb_iterator() const { return nb_iterator; } + VariableOrderMarkovData* get_markov_data() const { return markov_data; } + CategoricalSequenceProcess* get_state_process() const + { return state_process; } + int get_nb_output_process() const { return nb_output_process; } + CategoricalSequenceProcess** get_categorical_process() const + { return categorical_process; } + CategoricalSequenceProcess* get_categorical_process(int variable) const + { return categorical_process[variable]; } + stat_tool::DiscreteParametricProcess** get_discrete_parametric_process() const + { return discrete_parametric_process; } + stat_tool::DiscreteParametricProcess* get_discrete_parametric_process(int variable) const + { return discrete_parametric_process[variable]; } + stat_tool::ContinuousParametricProcess** get_continuous_parametric_process() const + { return continuous_parametric_process; } + stat_tool::ContinuousParametricProcess* get_continuous_parametric_process(int variable) const + { return continuous_parametric_process[variable]; } + }; + + + /// \brief Variable-order Markov chain iterator + + class VariableOrderMarkovIterator { + + private : + + VariableOrderMarkov *markov; ///< pointer on a VariableOrderMarkov object + int memory; ///< memory + + void copy(const VariableOrderMarkovIterator &it); + + public : + + VariableOrderMarkovIterator(VariableOrderMarkov *imarkov); + VariableOrderMarkovIterator(const VariableOrderMarkovIterator &iter) + { copy(iter); } + ~VariableOrderMarkovIterator(); + VariableOrderMarkovIterator& operator=(const VariableOrderMarkovIterator &iter); + + bool simulation(int **int_seq , int ilength = 1 , bool initialization = false); + int** simulation(int ilength = 1 , bool initialization = false); + + // class member access + + VariableOrderMarkov* get_markov() const { return markov; } + int get_memory() const { return memory; } + int get_nb_variable() const { return (markov ? markov->nb_output_process + 1 : 0); } + }; + + + + /// \brief Data structure corresponding to a variable-order Markov chain + + class VariableOrderMarkovChainData : public stat_tool::ChainData { + + public : + + VariableOrderMarkovChainData(stat_tool::process_type type , int inb_state , int inb_row , bool init_flag = false) + :ChainData(type , inb_state , inb_row , init_flag) {} + + void estimation(VariableOrderMarkovChain &markov , bool non_terminal = false , + transition_estimator estimator = MAXIMUM_LIKELIHOOD , + double laplace_coeff = LAPLACE_COEFF) const; + }; + + + /// \brief Data structure corresponding to a variable-order Markov chain + + class VariableOrderMarkovData : public MarkovianSequences { + + friend class MarkovianSequences; + friend class VariableOrderMarkov; + friend class HiddenVariableOrderMarkov; + + friend std::ostream& operator<<(std::ostream &os , const VariableOrderMarkovData &seq) + { return seq.ascii_write(os , false); } + + private : + + VariableOrderMarkov *markov; ///< pointer on a VariableOrderMarkov object + VariableOrderMarkovChainData *chain_data; ///< initial states and transition counts + double likelihood; ///< log-likelihood for the observed sequences + double restoration_likelihood; ///< log-likelihood for the restored state sequences + double sample_entropy; ///< entropy of the state sequences for the sample + double *posterior_probability; ///< posterior probabilities of the most probable state sequences + double *entropy; ///< entropies of the state sequences + double *nb_state_sequence; ///< numbers of state sequences + + void copy(const VariableOrderMarkovData &seq , bool model_flag = true); + void observation_frequency_distribution_correction(FrequencyDistribution **corrected_observation , + int variable , int start) const; + + public : + + VariableOrderMarkovData(); + VariableOrderMarkovData(const stat_tool::FrequencyDistribution &ihlength , int inb_variable , + stat_tool::variable_nature *itype , bool init_flag = false); + VariableOrderMarkovData(const MarkovianSequences &seq); + VariableOrderMarkovData(const MarkovianSequences &seq , sequence_transformation transform , + bool initial_run_flag); + VariableOrderMarkovData(const VariableOrderMarkovData &seq , bool model_flag = true , + sequence_transformation transform = SEQUENCE_COPY) + :MarkovianSequences(seq , transform) { copy(seq , model_flag); } + ~VariableOrderMarkovData(); + VariableOrderMarkovData& operator=(const VariableOrderMarkovData &seq); + + DiscreteDistributionData* extract(stat_tool::StatError &error , + stat_tool::process_distribution histo_type , + int variable , int value) const; + VariableOrderMarkovData* explicit_index_parameter(stat_tool::StatError &error) const; + VariableOrderMarkovData* remove_index_parameter(stat_tool::StatError &error) const; + MarkovianSequences* build_auxiliary_variable(stat_tool::StatError &error) const; + MarkovianSequences* residual_sequences(stat_tool::StatError &error) const; + + Correlation* state_autocorrelation_computation(stat_tool::StatError &error , int istate , + int max_lag = MAX_LAG) const; + Correlation* output_autocorrelation_computation(stat_tool::StatError &error , int variable , + int output , int max_lag = MAX_LAG) const; + + std::ostream& ascii_data_write(std::ostream &os , output_sequence_format format = COLUMN , + bool exhaustive = false) const; + std::string ascii_data_write(output_sequence_format format = COLUMN , bool exhaustive = false) const; + bool ascii_data_write(stat_tool::StatError &error , const std::string path , + output_sequence_format format = COLUMN , bool exhaustive = false) const; + + std::ostream& ascii_write(std::ostream &os , bool exhaustive = false) const; + bool ascii_write(stat_tool::StatError &error , const std::string path , bool exhaustive = false) const; + bool spreadsheet_write(stat_tool::StatError &error , const std::string path) const; + bool plot_write(stat_tool::StatError &error , const char *prefix , const char *title = NULL) const; + stat_tool::MultiPlotSet* get_plotable() const; + + void build_transition_count(const VariableOrderMarkovChain &markov , + bool begin = true , bool non_terminal = false); + void order0_estimation(VariableOrderMarkov &markov) const; + + // class member access + + VariableOrderMarkov* get_markov() const { return markov; } + VariableOrderMarkovChainData* get_chain_data() const { return chain_data; } + double get_likelihood() const { return likelihood; } + double get_restoration_likelihood() const { return restoration_likelihood; } + double get_sample_entropy() const { return sample_entropy; } + double get_posterior_probability(int index) const { return posterior_probability[index]; } + double get_entropy(int index) const { return entropy[index]; } + double get_nb_state_sequence(int index) const { return nb_state_sequence[index]; } + }; + + +}; // namespace sequence_analysis + + + +#endif diff --git a/src/cpp/sequence_analysis/vomc_algorithms.cpp b/src/cpp/sequence_analysis/vomc_algorithms.cpp new file mode 100644 index 0000000..03a3bbc --- /dev/null +++ b/src/cpp/sequence_analysis/vomc_algorithms.cpp @@ -0,0 +1,4964 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2019 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include +#include +#include +#include + +#include + +#include "stat_tool/stat_label.h" + +#include "variable_order_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace boost::math; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the transition probabilities corresponding to the non-terminal memories + * for an ordinary variable-order Markov chain. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::non_terminal_transition_probability_computation() + +{ + int i , j , k; + int nb_terminal; + double sum , *memory , *previous_memory; + + + for (i = 1;i < nb_row;i++) { + if (memo_type[i] == NON_TERMINAL) { + sum = 0.; + for (j = 0;j < nb_state;j++) { + sum += transition[i][j]; + } + break; + } + } + + if ((i < nb_row) && (sum == 0.)) { + + // initialization of the probabilities of the memories + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + memory[i] = initial[state[i][0]]; + } + else { + memory[i] = 0.; + } + } + + // computation of the probabilities of each memory as a function of the index parameter + + for (i = 1;i < nb_row;i++) { + if (memo_type[i] == NON_TERMINAL) { + for (j = 0;j < nb_state;j++) { + transition[i][j] = 1. / (double)nb_state; + } + } + } + + nb_terminal = (nb_row - 1) * (nb_state - 1) / nb_state + 1; + i = 1; + + do { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + for (k = 0;k < nb_memory[j];k++) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + + // computation of the sum of the absolute differences of the probabilities of the memories + + sum = 0.; + for (j = 1;j < nb_row;j++) { + sum += fabs(memory[j] - previous_memory[j]); + } + + i++; + } + while (((i < max_order) || (sum / nb_terminal > STATIONARY_PROBABILITY_THRESHOLD)) && + (i < STATIONARY_PROBABILITY_LENGTH)); + +# ifdef DEBUG + cout << "LENGTH: " << i << endl; +# endif + + // extraction of the transition probabilities corresponding to the non-terminal memories + + for (i = nb_row - 1;i >= 1;i--) { + if (child[i]) { + memory[i] = 0.; + + if (memo_type[i] == NON_TERMINAL) { + for (j = 0;j < nb_state;j++) { + transition[i][j] = 0.; + } + } + + for (j = 0;j < nb_state;j++) { + memory[i] += memory[child[i][j]]; + + if (memo_type[i] == NON_TERMINAL) { + for (k = 0;k < nb_state;k++) { + transition[i][k] += transition[child[i][j]][k] * memory[child[i][j]]; + } + } + } + + if (memo_type[i] == NON_TERMINAL) { + for (j = 0;j < nb_state;j++) { + transition[i][j] /= memory[i]; + } + } + } + } + + delete [] memory; + delete [] previous_memory; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the stationary distribution for an equilibrium variable-order Markov chain. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::initial_probability_computation() + +{ + int i , j , k; + int nb_terminal; + double sum , *memory , *previous_memory; + + + // initialization of the probabilities of the memories + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + memory[i] = initial[i]; + } + else { + memory[i] = 0.; + } + } + + // computation of the probabilities of each memory as a function of the index parameter + + nb_terminal = (nb_row - 1) * (nb_state - 1) / nb_state + 1; + i = 1; + + do { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + for (k = 0;k < nb_memory[j];k++) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + + // computation of the sum of the absolute differences of the probabilities of the memories + + sum = 0.; + for (j = 1;j < nb_row;j++) { + sum += fabs(memory[j] - previous_memory[j]); + } + + i++; + } + while ((i < max_order) || (sum / nb_terminal > STATIONARY_PROBABILITY_THRESHOLD)); +// && (i < STATIONARY_PROBABILITY_LENGTH)); + +# ifdef DEBUG + cout << "LENGTH: " << i << endl; +# endif + + initial[0] = 0.; + for (i = 1;i < nb_row;i++) { + initial[i] = memory[i]; + } + + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of a variable-order Markov chain for sequences. + * + * \param[in] seq reference on a MarkovianSequences object, + * \param[in] index sequence index. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double VariableOrderMarkov::likelihood_computation(const MarkovianSequences &seq , int index) const + +{ + int i , j , k; + int nb_value , memory , start , length , *pstate , **pioutput; + double likelihood = 0. , proba , **proutput; + + + // checking of the compatibility of the model with the data + + if (nb_output_process + 1 == seq.nb_variable) { + if (state_process->nb_value < seq.marginal_distribution[0]->nb_value) { + likelihood = D_INF; + } + + for (i = 0;i < nb_output_process;i++) { + if (((categorical_process[i]) || (discrete_parametric_process[i])) && + (seq.marginal_distribution[i])) { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < seq.marginal_distribution[i + 1]->nb_value) { + likelihood = D_INF; + break; + } + } + } + } + + else { + likelihood = D_INF; + } + + if (likelihood != D_INF) { + if (nb_output_process > 0) { + pioutput = new int*[nb_output_process]; + proutput = new double*[nb_output_process]; + } + + for (i = 0;i < seq.nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + switch (type) { + + case ORDINARY : { + pstate = seq.int_sequence[i][0]; + proba = initial[*pstate]; + memory = child[0][*pstate]; + start = 1; + break; + } + + case EQUILIBRIUM : { + if (max_order <= seq.length[i]) { + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + pstate = seq.int_sequence[i][0] + max_order; + + for (k = 0;k < order[j];k++) { + if (*--pstate != state[j][k]) { + break; + } + } + + if (k == order[j]) { + proba = initial[j]; + memory = j; + pstate = seq.int_sequence[i][0] + max_order - 1; + start = max_order; + break; + } + } + } + } + + else { + likelihood = D_INF; + } + break; + } + } + + if (likelihood == D_INF) { + break; + } + + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + + for (j = 0;j < nb_output_process;j++) { + switch (seq.type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq.int_sequence[i][j + 1] + start - 1; + break; + case REAL_VALUE : + proutput[j] = seq.real_sequence[i][j + 1] + start - 1; + break; + } + + if (categorical_process[j]) { + proba = categorical_process[j]->observation[*pstate]->mass[*pioutput[j]]; + } + + else if (discrete_parametric_process[j]) { + proba = discrete_parametric_process[j]->observation[*pstate]->mass[*pioutput[j]]; + } + + else { + switch (seq.type[j + 1]) { + case INT_VALUE : + proba = continuous_parametric_process[j]->observation[*pstate]->mass_computation(*pioutput[j] - seq.min_interval[j + 1] / 2 , *pioutput[j] + seq.min_interval[j + 1] / 2); + break; + case REAL_VALUE : + proba = continuous_parametric_process[j]->observation[*pstate]->mass_computation(*proutput[j] - seq.min_interval[j + 1] / 2 , *proutput[j] + seq.min_interval[j + 1] / 2); + break; + } + } + + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + } + + for (j = start;j < seq.length[i];j++) { + proba = transition[memory][*++pstate]; + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + + for (k = 0;k < nb_output_process;k++) { + if (categorical_process[k]) { + proba = categorical_process[k]->observation[*pstate]->mass[*++pioutput[k]]; + } + + else if (discrete_parametric_process[k]) { + proba = discrete_parametric_process[k]->observation[*pstate]->mass[*++pioutput[k]]; + } + + else { + switch (seq.type[k + 1]) { + case INT_VALUE : + pioutput[k]++; + proba = continuous_parametric_process[k]->observation[*pstate]->mass_computation(*pioutput[k] - seq.min_interval[k + 1] / 2 , *pioutput[k] + seq.min_interval[k + 1] / 2); + break; + case REAL_VALUE : + proutput[k]++; + proba = continuous_parametric_process[k]->observation[*pstate]->mass_computation(*proutput[k] - seq.min_interval[k + 1] / 2 , *proutput[k] + seq.min_interval[k + 1] / 2); + break; + } + } + + if (proba > 0.) { + likelihood += log(proba); + } + else { + likelihood = D_INF; + break; + } + } + + if (likelihood == D_INF) { + break; + } + + memory = next[memory][*pstate]; + } + + if (likelihood == D_INF) { + break; + } + } + } + + if ((likelihood != D_INF) && (type == EQUILIBRIUM)) { + length = 0; + for (i = 0;i < seq.nb_sequence;i++) { + if ((index == I_DEFAULT) || (index == i)) { + length += MIN(max_order - 1 , seq.length[i]); + } + } + + if (index == i) { + likelihood = likelihood * seq.length[i] / (seq.length[i] - length); + } + else { + likelihood = likelihood * seq.cumul_length / (seq.cumul_length - length); + } + } + + if (nb_output_process > 0) { + delete [] pioutput; + delete [] proutput; + } + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of initial states and transitions + * for a variable-order Markov chain. + * + * \param[in] chain_data reference on a VariableOrderMarkovChainData object. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double VariableOrderMarkov::likelihood_computation(const VariableOrderMarkovChainData &chain_data) const + +{ + int i , j; + double likelihood; + + + if ((chain_data.nb_state != nb_state) || (chain_data.nb_row != nb_row)) { + likelihood = D_INF; + } + + else { + likelihood = 0.; + + for (i = 0;i < (type == ORDINARY ? nb_state : nb_row);i++) { + if (chain_data.initial[i] > 0) { + if (initial[i] > 0.) { + likelihood += chain_data.initial[i] * log(initial[i]); + } + else { + likelihood = D_INF; + break; + } + } + } + + if (likelihood != D_INF) { + for (i = 1;i < nb_row;i++) { + if ((memo_type[i] == TERMINAL) || ((type == ORDINARY) && + (memo_type[i] == NON_TERMINAL))) { + for (j = 0;j < nb_state;j++) { + if (chain_data.transition[i][j] > 0) { + if (transition[i][j] > 0.) { + likelihood += chain_data.transition[i][j] * log(transition[i][j]); + } + else { + likelihood = D_INF; + break; + } + } + } + + if (likelihood == D_INF) { + break; + } + } + } + } + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Correction of the observation frequency distributions. + * + * \param[in] corrected_observation reference on FrequencyDistribution objects, + * \param[in] variable variable index, + * \param[in] start start. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovData::observation_frequency_distribution_correction(FrequencyDistribution **corrected_observation , + int variable , int start) const + +{ + int i , j; + int *pstate , *poutput; + + + for (i = 0;i < nb_sequence;i++) { + pstate = int_sequence[i][0]; + poutput = int_sequence[i][variable]; + for (j = 0;j < MIN(start , length[i]);j++) { + (corrected_observation[*pstate++]->frequency[*poutput++])--; + } + } + + // computation of the characteristics of the observation frequency distributions + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + corrected_observation[i]->nb_value_computation(); + corrected_observation[i]->offset_computation(); + corrected_observation[i]->nb_element_computation(); + corrected_observation[i]->max_computation(); + + if (!characteristics[variable]) { + corrected_observation[i]->mean_computation(); + corrected_observation[i]->variance_computation(); + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the log-likelihood of a variable-order Markov chain for sequences. + * + * \param[in] seq reference on a VariableOrderMarkovData object. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double VariableOrderMarkov::likelihood_computation(const VariableOrderMarkovData &seq) const + +{ + int i , j; + int nb_value , length; + double buff , likelihood = 0.; + FrequencyDistribution **observation; + + + // checking of the compatibility of the model with the data + + if (nb_output_process + 1 == seq.nb_variable) { + if ((!(seq.marginal_distribution[0])) || (nb_state < seq.marginal_distribution[0]->nb_value)) { + likelihood = D_INF; + } + + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) || (discrete_parametric_process[i])) { + if (categorical_process[i]) { + nb_value = categorical_process[i]->nb_value; + } + else { + nb_value = discrete_parametric_process[i]->nb_value; + } + + if (nb_value < seq.marginal_distribution[i + 1]->nb_value) { + likelihood = D_INF; + break; + } + } + + else if (!(seq.marginal_distribution[i + 1])) { + likelihood = D_INF; + break; + } + } + } + + else { + likelihood = D_INF; + } + + if (likelihood != D_INF) { + likelihood = likelihood_computation(*(seq.chain_data)); + + if ((likelihood != D_INF) && (nb_output_process > 0)) { + observation = new FrequencyDistribution*[nb_state]; + + for (i = 0;i < nb_output_process;i++) { + switch (type) { + + case ORDINARY : { + for (j = 0;j < nb_state;j++) { + observation[j] = seq.observation_distribution[i + 1][j]; + } + break; + } + + case EQUILIBRIUM : { + for (j = 0;j < nb_state;j++) { + observation[j] = new FrequencyDistribution(*(seq.observation_distribution[i + 1][j])); + } + break; + } + } + + if (type == EQUILIBRIUM) { + seq.observation_frequency_distribution_correction(observation , i , max_order - 1); + } + + if (categorical_process[i]) { + for (j = 0;j < nb_state;j++) { + buff = categorical_process[i]->observation[j]->likelihood_computation(*observation[j]); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + break; + } + } + } + + else if (discrete_parametric_process[i]) { + for (j = 0;j < nb_state;j++) { + buff = discrete_parametric_process[i]->observation[j]->likelihood_computation(*observation[j]); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + break; + } + } + } + + else { + for (j = 0;j < nb_state;j++) { + buff = continuous_parametric_process[i]->observation[j]->likelihood_computation(*observation[j] , (int)seq.min_interval[i]); + + if (buff != D_INF) { + likelihood += buff; + } + else { + likelihood = D_INF; + break; + } + } + } + + if (type == EQUILIBRIUM) { + for (j = 0;j < nb_state;j++) { + delete observation[j]; + } + } + + if (likelihood == D_INF) { + break; + } + } + + delete [] observation; + } + + if ((likelihood != D_INF) && (type == EQUILIBRIUM)) { + length = 0; + for (i = 0;i < seq.nb_sequence;i++) { + length += MIN(max_order - 1 , seq.length[i]); + } + + likelihood = likelihood * seq.cumul_length / (seq.cumul_length - length); + } + } + + return likelihood; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Counting of initial states and transitions. + * + * \param[in] chain_data reference on a VariableOrderMarkovChainData object, + * \param[in] markov reference on a VariableOrderMarkovChain object, + * \param[in] begin flag for taking account of the beginning of sequences, + * \param[in] non_terminal flag for cumulating on the non-terminal memories. + */ +/*--------------------------------------------------------------*/ + +void MarkovianSequences::transition_count_computation(const VariableOrderMarkovChainData &chain_data , + const VariableOrderMarkovChain &markov , + bool begin , bool non_terminal) const + +{ + int i , j , k; + int memory , start , *pstate; + + + for (i = 0;i < (markov.type == ORDINARY ? chain_data.nb_state : chain_data.nb_row);i++) { + chain_data.initial[i] = 0; + } + + for (i = 0;i < chain_data.nb_row;i++) { + for (j = 0;j < chain_data.nb_state;j++) { + chain_data.transition[i][j] = 0; + } + } + + // extraction of initial states and transitions + + for (i = 0;i < nb_sequence;i++) { + switch (markov.type) { + + case ORDINARY : { + pstate = int_sequence[i][0]; + (chain_data.initial[*pstate])++; + memory = markov.child[0][*pstate]; + start = 1; + break; + } + + case EQUILIBRIUM : { + if (markov.max_order <= length[i]) { + for (j = 1;j < chain_data.nb_row;j++) { + if (!markov.child[j]) { + pstate = int_sequence[i][0] + markov.max_order; + + for (k = 0;k < markov.order[j];k++) { + if (*--pstate != markov.state[j][k]) { + break; + } + } + + if (k == markov.order[j]) { + (chain_data.initial[j])++; + memory = j; + pstate = int_sequence[i][0] + markov.max_order - 1; + start = markov.max_order; + break; + } + } + } + } + break; + } + } + + for (j = start;j < length[i];j++) { + pstate++; + if ((begin) || (!markov.child[memory])) { + (chain_data.transition[memory][*pstate])++; + } + memory = markov.next[memory][*pstate]; + } + } + + // extraction of the transition counts corresponding to non-terminal memories + + for (i = chain_data.nb_row - 1;i >= 1;i--) { + if ((markov.memo_type[i] == COMPLETION) || (non_terminal)) { + for (j = 0;j < chain_data.nb_state;j++) { + chain_data.transition[markov.parent[i]][j] += chain_data.transition[i][j]; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Construction of initial state and transition counts. + * + * \param[in] markov reference on a VariableOrderMarkovChain object, + * \param[in] begin flag for taking account of the beginning of sequences, + * \param[in] non_terminal flag for cumulating on the non-terminal memories. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovData::build_transition_count(const VariableOrderMarkovChain &markov , + bool begin , bool non_terminal) + +{ + chain_data = new VariableOrderMarkovChainData(markov.type , markov.nb_state , markov.nb_row); + transition_count_computation(*chain_data , markov , begin , non_terminal); +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a variable-order Markov chain from the initial state and transition counts. + * + * \param[in] markov reference on a VariableOrderMarkovChain object, + * \param[in] non_terminal flag for estiming the transition probabilities on the non-terminal memories, + * \param[in] estimator estimator (maximum likelihood, Laplace, adaptative Laplace), + * \param[in] laplace_coeff Laplace estimator coefficient. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChainData::estimation(VariableOrderMarkovChain &markov , bool non_terminal , + transition_estimator estimator , double laplace_coeff) const + +{ + int i , j; + int sum , nb_parameter; + + + // estimation of the initial probabilities + + if (markov.type == ORDINARY) { + sum = 0; + for (i = 0;i < nb_state;i++) { + sum += initial[i]; + } + + for (i = 0;i < nb_state;i++) { + markov.initial[i] = (double)initial[i] / (double)sum; + } + } + + // estimation of the transition probabilities + + for (i = 0;i < nb_row;i++) { + if ((markov.memo_type[i] == TERMINAL) || ((markov.memo_type[i] == NON_TERMINAL) && + ((markov.type == ORDINARY) || (non_terminal)))) { + sum = 0; + for (j = 0;j < nb_state;j++) { + sum += transition[i][j]; + } + + if ((estimator == ADAPTATIVE_LAPLACE) || (estimator == UNIFORM_SUBSET) || + (estimator == UNIFORM_CARDINALITY)) { + nb_parameter = 0; + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > 0) { + nb_parameter++; + } + } + } + + switch (estimator) { + + case MAXIMUM_LIKELIHOOD : { + if (sum > 0) { + for (j = 0;j < nb_state;j++) { + markov.transition[i][j] = (double)transition[i][j] / (double)sum; + } + } + + else if (i > 0) { + for (j = 0;j < markov.state[i][0];j++) { + markov.transition[i][j] = 0.; + } + markov.transition[i][markov.state[i][0]] = 1.; + for (j = markov.state[i][0] + 1;j < nb_state;j++) { + markov.transition[i][j] = 0.; + } + } + break; + } + + // Laplace estimator + + case LAPLACE : { + for (j = 0;j < nb_state;j++) { + markov.transition[i][j] = (double)(transition[i][j] + laplace_coeff) / + (double)(sum + nb_state * laplace_coeff); + } + break; + } + + // adaptative Laplace estimator (Vert, 2001) + + case ADAPTATIVE_LAPLACE : { + if (sum > 0) { + for (j = 0;j < nb_state;j++) { + markov.transition[i][j] = ((double)transition[i][j] + (double)nb_parameter / (double)nb_state) / + (double)(sum + nb_parameter); + } + } + + else { + for (j = 0;j < nb_state;j++) { + markov.transition[i][j] = 1 / (double)nb_state; + } + } + break; + } + + // Ristad (1995) estimator 1 + + case UNIFORM_SUBSET : { + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > 0) { +/* markov.transition[i][j] = ((double)(transition[i][j] + 1) * (double)(sum + 1 - nb_parameter)) / + ((double)(sum + nb_parameter) * (double)(sum + 1 - nb_parameter) + + nb_parameter * (nb_state - nb_parameter)); */ + markov.transition[i][j] = ((double)transition[i][j] * (double)(sum + nb_parameter) * + (double)(sum + 1 - nb_parameter)) / ((double)sum * ((double)(sum + nb_parameter) * + (double)(sum + 1 - nb_parameter) + nb_parameter * (nb_state - nb_parameter))); + } + + else { + markov.transition[i][j] = ((double)nb_parameter) / ((double)(sum + nb_parameter) * + (double)(sum + 1 - nb_parameter) + nb_parameter * (nb_state - nb_parameter)); + } + } + break; + } + + // Ristad (1995) estimator 2 + + case UNIFORM_CARDINALITY : { + if (nb_parameter == nb_state) { + for (j = 0;j < nb_state;j++) { +// markov.transition[i][j] = (double)(transition[i][j] + 1) / (double)(sum + nb_state); + markov.transition[i][j] = (double)transition[i][j] / (double)sum; + } + } + + else { + for (j = 0;j < nb_state;j++) { + if (transition[i][j] > 0) { +/* markov.transition[i][j] = ((double)(transition[i][j] + 1) * (double)(sum + 1 - nb_parameter)) / + ((double)(sum * sum + sum + 2 * nb_parameter)); */ + markov.transition[i][j] = ((double)transition[i][j] * ((double)sum * (double)(sum + 1) + + nb_parameter * (1 - nb_parameter))) / + ((double)sum * (double)(sum * sum + sum + 2 * nb_parameter)); + } + else { + markov.transition[i][j] = (double)(nb_parameter * (nb_parameter + 1)) / + ((nb_state - nb_parameter) * (double)(sum * sum + sum + 2 * nb_parameter)); + } + } + } + break; + } + } + } + + else if (markov.memo_type[i] == COMPLETION) { + for (j = 0;j < nb_state;j++) { + markov.transition[i][j] = markov.transition[markov.parent[i]][j]; + } + } + + else { + for (j = 0;j < nb_state;j++) { + markov.transition[i][j] = 0.; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a zero-order Markov chain. + * + * \param[in] markov reference on a VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovData::order0_estimation(VariableOrderMarkov &markov) const + +{ + int i , j; +// int sum; + + +/* sum = 0; + for (i = 0;i < chain_data->nb_state;i++) { + sum += chain_data->initial[i] + chain_data->transition[0][i]; + } */ + + for (i = 0;i < chain_data->nb_state;i++) { + markov.initial[i] = (double)marginal_distribution[0]->frequency[i] / + (double)marginal_distribution[0]->nb_element; +// markov.initial[i] = (double)(chain_data->initial[i] + chain_data->transition[0][i]) / +// (double)sum; + for (j = 0;j <= chain_data->nb_state;j++) { + markov.transition[j][i] = markov.initial[i]; + } + } + +# ifdef DEBUG + cout << "\n"; + for (i = 0;i < chain_data->nb_state;i++) { + cout << STAT_label[STATL_STATE] << " " << i << ": " << markov.initial[i] << " | " + << (double)(chain_data->initial[i] + chain_data->transition[0][i]) / (double)cumul_length << endl; + } +# endif + +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a variable-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying estimation intermediate results, + * \param[in] itype process type (ORDINARY/EQUILIBRIUM), + * \param[in] min_order minimum order of the variable-order Markov chain, + * \param[in] max_order maximum order of the variable-order Markov chain, + * \param[in] algorithm algorithm (CTM_BIC/CTM_KT/LOCAL_BIC/CONTEXT), + * \param[in] threshold threshold on the memory pruning, + * \param[in] estimator estimator (maximum likelihood, Laplace, adaptative Laplace), + * \param[in] global_initial_transition type of estimation of the initial transition probabilities (ordinary process case), + * \param[in] global_sample type of management of the sample size + * (for the LOCAL_BIC or CONTEXT algorithm), + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov* MarkovianSequences::variable_order_markov_estimation(StatError &error , ostream *os , + process_type itype , int min_order , int max_order , + memory_tree_selection algorithm , double threshold , + transition_estimator estimator , bool global_initial_transition , + bool global_sample , bool counting_flag) const + +{ + bool status = true , order0 , *active_memory , *selected_memory; + int i , j , k; + int sample_size , length_nb_sequence , nb_row , state , nb_terminal , *memory_count , + *nb_parameter , *diff_nb_parameter; + double num , denom , max_likelihood , *memory_likelihood , *diff_likelihood; + VariableOrderMarkov *markov , *completed_markov; + VariableOrderMarkovChainData *chain_data; + VariableOrderMarkovData *seq; + + + completed_markov = NULL; + error.init(); + + if ((type[0] != INT_VALUE) && (type[0] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if ((marginal_distribution[0]->nb_value < 2) || + (marginal_distribution[0]->nb_value > NB_STATE)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + + else if (!characteristics[0]) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (marginal_distribution[0]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << SEQ_error[SEQR_MISSING_STATE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + + if ((min_order < 0) || (min_order >= max_order)) { + status = false; + error.update(SEQ_error[SEQR_MIN_ORDER]); + } + if ((max_order <= min_order) || (max_order > ORDER)) { + status = false; + error.update(SEQ_error[SEQR_MAX_ORDER]); + } + else { + if ((int)pow((double)marginal_distribution[0]->nb_value , max_order + 1) > NB_PARAMETER) { + status = false; + error.update(SEQ_error[SEQR_NB_PARAMETER]); + } + } + } + + if (nb_variable > 1) { + if (nb_variable > 2) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , "1 or 2"); + } + + if ((type[1] != INT_VALUE) && (type[1] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (test_hidden(1)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << SEQ_error[SEQR_OVERLAP]; + error.update((error_message.str()).c_str()); + } + + if (!characteristics[1]) { + for (i = 0;i < marginal_distribution[1]->nb_value;i++) { + if (marginal_distribution[1]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + } + } + + if (status) { + if (os) { + length_nb_sequence = nb_sequence; + + sample_size = cumul_length; + *os << "\n" << STAT_label[STATL_SAMPLE_SIZE] << ":"; + for (i = 0;i <= MIN((int)::round(log((double)cumul_length) / log((double)marginal_distribution[0]->nb_value)) , max_length - 2);i++) { + *os << " " << sample_size; + sample_size -= length_nb_sequence; + length_nb_sequence -= length_distribution->frequency[i + 1]; + } + *os << endl; + + *os << SEQ_label[SEQL_RECOMMENDED_MAX_ORDER] << ": " + << MIN((int)::round(log((double)cumul_length) / log((double)marginal_distribution[0]->nb_value)) , max_length - 2) << endl; + +/* if ((algorithm == CONTEXT) && (threshold == CONTEXT_THRESHOLD)) { + Test test(CHI2); + test.df1 = marginal_distribution[0]->nb_value - 1; + test.critical_probability = 0.05; + test.chi2_value_computation(); + + threshold = test.value; + + *os << "\n" << SEQ_label[SEQL_PRUNING_THRESHOLD] << ": " << threshold << endl; + } */ + } + + markov = new VariableOrderMarkov(itype , marginal_distribution[0]->nb_value , max_order , true); + + // counting of transitions and computation of the corresponding number of free parameters + + chain_data = new VariableOrderMarkovChainData(markov->type , markov->nb_state , markov->nb_row); + transition_count_computation(*chain_data , *markov , false , true); + chain_data->estimation(*markov , true , estimator); + + memory_count = new int[markov->nb_row]; + memory_likelihood = new double[markov->nb_row]; + nb_parameter = new int[markov->nb_row]; + diff_likelihood = new double[markov->nb_row]; + diff_nb_parameter = new int[markov->nb_row]; + + for (i = 0;i < markov->nb_row;i++) { + memory_count[i] = 0; + for (j = 0;j < markov->nb_state;j++) { + memory_count[i] += chain_data->transition[i][j]; + } + } + + // BIC-type estimator + + if (algorithm != CTM_KT) { + for (i = 0;i < markov->nb_row;i++) { + memory_likelihood[i] = 0.; + if (memory_count[i] > 0) { + for (j = 0;j < markov->nb_state;j++) { + if (chain_data->transition[i][j] > 0) { + memory_likelihood[i] += chain_data->transition[i][j] * log(markov->transition[i][j]); + } + } + } + } + } + + // Krichevsky-Trofimov estimator + + else { + for (i = 0;i < markov->nb_row;i++) { + memory_likelihood[i] = 0.; + if (memory_count[i] > 0) { + denom = memory_count[i] - 1. + (double)markov->nb_state / 2.; + for (j = 0;j < markov->nb_state;j++) { + if (chain_data->transition[i][j] > 0) { + num = chain_data->transition[i][j] - 0.5; + for (k = 0;k < chain_data->transition[i][j];k++) { + memory_likelihood[i] += log(num) - log(denom); + num--; + denom--; + } + } + } + } + } + } + + for (i = 0;i < markov->nb_row;i++) { + nb_parameter[i] = -1; + for (j = 0;j < markov->nb_state;j++) { + if (chain_data->transition[i][j] > 0) { +// if (markov->transition[i][j] > 0.) { + nb_parameter[i]++; + } + } + + if (nb_parameter[i] == -1) { + nb_parameter[i] = 0; + } + else if ((algorithm == CTM_BIC) || (algorithm == LOCAL_BIC)) { + memory_likelihood[i] = 2 * memory_likelihood[i] - + nb_parameter[i] * log((double)memory_count[0]); + } + } + +# ifdef DEBUG + for (i = 0;i < markov->nb_row;i++) { + for (j = markov->max_order - 1;j >= markov->order[i];j--) { + cout << " "; + } + for (j = markov->order[i] - 1;j >= 0;j--) { + cout << markov->state[i][j] << " "; + } + cout << " "; + for (j = 0;j < markov->nb_state;j++) { + cout << chain_data->transition[i][j] << " "; + } + cout << " | "; + for (j = 0;j < markov->nb_state;j++) { + cout << markov->transition[i][j] << " "; + } + cout << " " << memory_likelihood[i] << " " << nb_parameter[i] << endl; + } +# endif + + // pruning of the memory tree + + if (os) { + if ((algorithm == CONTEXT) && (global_sample)) { + *os << "\n" << SEQ_label[SEQL_PRUNING_THRESHOLD] << ": " + << threshold * log((double)memory_count[0]) << endl; + } + + *os << "\n"; + } + + if ((algorithm == CTM_BIC) || (algorithm == CTM_KT)) { + active_memory = new bool[markov->nb_row]; + for (i = 0;i < markov->nb_row;i++) { + active_memory[i] = false; + } + + selected_memory = new bool[markov->nb_row]; + selected_memory[0] = true; + for (i = 1;i < markov->nb_row;i++) { + if (markov->order[i] <= min_order) { + selected_memory[i] = true; + } + else { + selected_memory[i] = false; + } + } + + // computation of the maximum BIC or Krichevsky-Trofimov estimator for each memory sub-tree + + for (i = markov->nb_row - 1;i >= 0;i--) { + if ((markov->memo_type[i] == NON_TERMINAL) && (nb_parameter[i] > 0) && + (memory_count[i] >= MEMORY_MIN_COUNT)) { + max_likelihood = 0.; + diff_nb_parameter[i] = -nb_parameter[i]; + for (j = 0;j < markov->nb_state;j++) { + max_likelihood += memory_likelihood[markov->child[i][j]]; + diff_nb_parameter[i] += nb_parameter[markov->child[i][j]]; + } + + diff_likelihood[i] = max_likelihood - memory_likelihood[i]; + + if (max_likelihood > memory_likelihood[i]) { + memory_likelihood[i] = max_likelihood; + active_memory[i] = true; + } + } + + else if (os) { + diff_likelihood[i] = 0.; + diff_nb_parameter[i] = 0; + } + } + + // construction by chaining of the memory tree + + nb_row = 1; + for (i = 0;i < markov->nb_row;i++) { + if ((markov->order[i] >= min_order) && (active_memory[i]) && (selected_memory[i]) && + (diff_nb_parameter[i] >= 0) && (diff_likelihood[i] >= threshold)) { +// (diff_likelihood[i] >= threshold)) { + markov->memo_type[i] = NON_TERMINAL; + for (j = 0;j < markov->nb_state;j++) { + selected_memory[markov->child[i][j]] = true; + markov->memo_type[markov->child[i][j]] = TERMINAL; + } + nb_row += markov->nb_state; + } + } + + if (nb_row == 1) { + order0 = true; + for (i = 0;i < markov->nb_state;i++) { + selected_memory[markov->child[0][i]] = true; + markov->memo_type[markov->child[0][i]] = TERMINAL; + } + nb_row += markov->nb_state; + } + + else { + order0 = false; + } + + if (os) { + for (i = 0;i < markov->nb_row;i++) { + if ((nb_parameter[i] > 0) && (memory_count[i] >= MEMORY_MIN_COUNT)) { + for (j = markov->max_order - 1;j >= markov->order[i];j--) { + *os << " "; + } + for (j = markov->order[i] - 1;j >= 0;j--) { + *os << markov->state[i][j] << " "; + } + + *os << " " << diff_likelihood[i] << " " << diff_nb_parameter[i] << " | " << memory_count[i] + << " | " << active_memory[i] << " " << selected_memory[i] << endl; + } + } + } + + for (i = markov->nb_row - 1;i >= 0;i--) { + if (selected_memory[i]) { + if ((markov->memo_type[i] == TERMINAL) && (markov->child[i])) { + delete [] markov->child[i]; + markov->child[i] = NULL; + } + } + + else { + markov->memo_type[i] = PRUNED; + delete [] markov->state[i]; + markov->state[i] = NULL; + if (markov->child[i]) { + delete [] markov->child[i]; + markov->child[i] = NULL; + } + } + } + } + + else { + order0 = false; + nb_row = markov->nb_row; + + for (i = markov->nb_row - 1;i >= 0;i--) { + if ((markov->memo_type[i] == NON_TERMINAL) && (markov->order[i] >= min_order)) { + for (j = 0;j < markov->nb_state;j++) { + if (markov->memo_type[markov->child[i][j]] != TERMINAL) { + break; + } + } + + if (j == markov->nb_state) { + if ((nb_parameter[i] > 0) && (memory_count[i] >= MEMORY_MIN_COUNT)) { +/* if (algorithm == LOCAL_BIC) { + if ((nb_parameter[i] > 0) && (memory_count[i] >= MEMORY_MIN_COUNT)) { + diff_likelihood[i] = -memory_likelihood[i]; + diff_nb_parameter[i] = -nb_parameter[i]; + for (j = 0;j < markov->nb_state;j++) { + diff_likelihood[i] += memory_likelihood[markov->child[i][j]]; + diff_nb_parameter[i] += nb_parameter[markov->child[i][j]]; + } + } + } */ + + switch (algorithm) { + case LOCAL_BIC : + diff_likelihood[i] = 0.; + diff_nb_parameter[i] = -nb_parameter[i]; + break; + case CONTEXT : + max_likelihood = D_INF; + break; + } + + for (j = 0;j < markov->nb_state;j++) { + if (algorithm == CONTEXT) { + diff_likelihood[i] = 0.; + } + for (k = 0;k < markov->nb_state;k++) { + if (chain_data->transition[markov->child[i][j]][k] > 0) { + diff_likelihood[i] += chain_data->transition[markov->child[i][j]][k] * + log(markov->transition[markov->child[i][j]][k] / + markov->transition[i][k]); + } + } + + if ((algorithm == CONTEXT) && (diff_likelihood[i] > max_likelihood)) { + max_likelihood = diff_likelihood[i]; + + if (os) { + state = j; + } + } + + if (algorithm == LOCAL_BIC) { + diff_nb_parameter[i] += nb_parameter[markov->child[i][j]]; + } + } + + if (algorithm == LOCAL_BIC) { + diff_likelihood[i] = 2 * diff_likelihood[i] - diff_nb_parameter[i] * + log((double)memory_count[global_sample ? 0 : i]); + } + +// if ((os) && (diff_likelihood >= threshold)) { + if (os) { + for (j = markov->max_order - 1;j >= markov->order[i];j--) { + *os << " "; + } + for (j = markov->order[i] - 1;j >= 0;j--) { + *os << markov->state[i][j] << " "; + } + + switch (algorithm) { + + case LOCAL_BIC : { + *os << " " << diff_likelihood[i] << " " << diff_nb_parameter[i] + << " " << memory_count[i] << endl; + break; + } + + case CONTEXT : { + *os << " " << 2 * max_likelihood << " " << state; + if (!global_sample) { + *os << " " << threshold * log((double)memory_count[i]); + } + *os << endl; + break; + } + } + } + } + + if ((nb_parameter[i] == 0) || (memory_count[i] < MEMORY_MIN_COUNT) || + ((algorithm == LOCAL_BIC) && ((diff_nb_parameter[i] < 0) || (diff_likelihood[i] < threshold))) || + ((algorithm == CONTEXT) && + (2 * max_likelihood < threshold * log((double)memory_count[global_sample ? 0 : i])))) { +// (2 * max_likelihood < threshold))) { + if (i > 0) { + markov->memo_type[i] = TERMINAL; + + for (j = 0;j < markov->nb_state;j++) { + markov->memo_type[markov->child[i][j]] = PRUNED; + delete [] markov->state[markov->child[i][j]]; + markov->state[markov->child[i][j]] = NULL; + } + delete [] markov->child[i]; + markov->child[i] = NULL; + + nb_row -= markov->nb_state; + } + + else { + order0 = true; + } + } + } + } + } + } + + // copy of the conserved memories + + i = 1; + for (j = 1;j < markov->nb_row;j++) { + if (markov->memo_type[j] != PRUNED) { + if (i != j) { + for (k = 0;k < markov->nb_state;k++) { + markov->transition[i][k] = markov->transition[j][k]; + } + + markov->memo_type[i] = markov->memo_type[j]; + markov->order[i] = markov->order[j]; + + delete [] markov->state[i]; + markov->state[i] = new int[markov->order[i]]; + for (k = 0;k < markov->order[i];k++) { + markov->state[i][k] = markov->state[j][k]; + } + + for (k = i - 1;k >= 0;k--) { + if ((markov->child[k]) && + (markov->child[k][markov->state[i][markov->order[i] - 1]] == j)) { + markov->child[k][markov->state[i][markov->order[i] - 1]] = i; + markov->parent[i] = k; + break; + } + } + + if (markov->child[j]) { + if (!markov->child[i]) { + markov->child[i] = new int[markov->nb_state]; + } + for (k = 0;k < markov->nb_state;k++) { + markov->child[i][k] = markov->child[j][k]; + } + } + + else if (markov->child[i]) { + delete [] markov->child[i]; + markov->child[i] = NULL; + } + } + + i++; + } + } + + for (i = 1;i < markov->nb_row;i++) { + delete [] markov->next[i]; + } + delete [] markov->next; + markov->next = NULL; + + markov->nb_row = nb_row; + markov->max_order_computation(); + + delete chain_data; + + delete [] memory_count; + delete [] memory_likelihood; + delete [] nb_parameter; + delete [] diff_likelihood; + delete [] diff_nb_parameter; + + if ((algorithm == CTM_BIC) || (algorithm == CTM_KT)) { + delete [] active_memory; + delete [] selected_memory; + } + + completed_markov = new VariableOrderMarkov(*markov , nb_variable - 1 , + (nb_variable == 2 ? marginal_distribution[1]->nb_value : 0)); + delete markov; + + completed_markov->markov_data = new VariableOrderMarkovData(*this , SEQUENCE_COPY , + (completed_markov->type == EQUILIBRIUM ? true : false)); + + seq = completed_markov->markov_data; + seq->state_variable_init(); + + if (order0) { + seq->build_transition_count(*completed_markov , true , true); + seq->order0_estimation(*completed_markov); + } + + else { + seq->build_transition_count(*completed_markov , true , + (((completed_markov->type == ORDINARY) && (global_initial_transition)) ? true : false)); + seq->chain_data->estimation(*completed_markov); + + if ((completed_markov->type == ORDINARY) && (global_initial_transition)) { + for (i = 1;i < completed_markov->nb_row;i++) { + if (completed_markov->memo_type[i] == NON_TERMINAL) { + for (j = 0;j < completed_markov->nb_state;j++) { + for (k = 0;k < completed_markov->nb_state;k++) { + seq->chain_data->transition[i][k] -= seq->chain_data->transition[completed_markov->child[i][j]][k]; + } + } + } + } + } + } + + if (completed_markov->type == EQUILIBRIUM) { + nb_terminal = (completed_markov->nb_row - 1) * (completed_markov->nb_state - 1) / + completed_markov->nb_state + 1; + + for (i = 1;i < completed_markov->nb_row;i++) { + if (!completed_markov->child[i]) { + completed_markov->initial[i] = 1. / (double)nb_terminal; + } + else { + completed_markov->initial[i] = 0.; + } + } + + completed_markov->initial_probability_computation(); + } + + // estimation of the categorical observation distributions + + if (completed_markov->nb_output_process == 1) { + seq->build_observation_frequency_distribution(completed_markov->nb_state); + + for (i = 0;i < completed_markov->nb_state;i++) { + seq->observation_distribution[1][i]->distribution_estimation(completed_markov->categorical_process[0]->observation[i]); + } + } + +# ifdef DEBUG + for (i = 1;i < completed_markov->nb_row;i++) { + for (j = completed_markov->max_order - 1;j >= completed_markov->order[i];j--) { + cout << " "; + } + for (j = completed_markov->order[i] - 1;j >= 0;j--) { + cout << completed_markov->state[i][j] << " "; + } + cout << " "; + for (j = 0;j < completed_markov->nb_state;j++) { + cout << seq->chain_data->transition[i][j] << " "; + } + cout << " | "; + for (j = 0;j < completed_markov->nb_state;j++) { + cout << completed_markov->transition[i][j] << " "; + } + cout << endl; + } +# endif + + // computation of the log-likelihood and the characteristic distributions of the model + + seq->likelihood = completed_markov->likelihood_computation(*seq); + + if (os) { + *os << "\n" << STAT_label[STATL_LIKELIHOOD] << ": " << seq->likelihood + << " | " << completed_markov->likelihood_computation(*seq , I_DEFAULT) << endl; + } + + if (seq->likelihood == D_INF) { + delete completed_markov; + completed_markov = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + + else { + completed_markov->component_computation(); + completed_markov->characteristic_computation(*seq , counting_flag , I_DEFAULT , false); + } + } + + return completed_markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a variable-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] imarkov reference on a VariableOrderMarkov object, + * \param[in] global_initial_transition type of estimation of the initial transition probabilities (ordinary process case), + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov* MarkovianSequences::variable_order_markov_estimation(StatError &error , + const VariableOrderMarkov &imarkov, + bool global_initial_transition , + bool counting_flag) const + +{ + bool status = true; + int i , j , k; + int nb_terminal; + VariableOrderMarkov *markov; + VariableOrderMarkovData *seq; + + + markov = NULL; + error.init(); + + if ((type[0] != INT_VALUE) && (type[0] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if ((marginal_distribution[0]->nb_value < 2) || + (marginal_distribution[0]->nb_value > NB_STATE)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + + else if (!characteristics[0]) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (marginal_distribution[0]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << SEQ_error[SEQR_MISSING_STATE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + } + + if (nb_variable > 1) { + if (nb_variable > 2) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , "1 or 2"); + } + + if ((type[1] != INT_VALUE) && (type[1] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (test_hidden(1)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << SEQ_error[SEQR_OVERLAP]; + error.update((error_message.str()).c_str()); + } + + if (marginal_distribution[1]->nb_value > NB_STATE) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT]); + } + +/* if (!characteristics[1]) { + for (i = 0;i < marginal_distribution[1]->nb_value;i++) { + if (marginal_distribution[1]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } */ + } + } + + if (imarkov.nb_output_process != nb_variable - 1) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + if (status) { + markov = new VariableOrderMarkov(imarkov , false); + + markov->markov_data = new VariableOrderMarkovData(*this , SEQUENCE_COPY , (markov->type == EQUILIBRIUM ? true : false)); + + seq = markov->markov_data; + seq->state_variable_init(); + seq->build_transition_count(*markov , true , + (((markov->type == ORDINARY) && (global_initial_transition)) ? true : false)); + seq->chain_data->estimation(*markov); + + if ((markov->type == ORDINARY) && (global_initial_transition)) { + for (i = 1;i < markov->nb_row;i++) { + if (markov->memo_type[i] == NON_TERMINAL) { + for (j = 0;j < markov->nb_state;j++) { + for (k = 0;k < markov->nb_state;k++) { + seq->chain_data->transition[i][k] -= seq->chain_data->transition[markov->child[i][j]][k]; + } + } + } + } + } + + if (markov->type == EQUILIBRIUM) { + nb_terminal = (markov->nb_row - 1) * (markov->nb_state - 1) / markov->nb_state + 1; + + for (i = 1;i < markov->nb_row;i++) { + if (!markov->child[i]) { + markov->initial[i] = 1. / (double)nb_terminal; + } + else { + markov->initial[i] = 0.; + } + } + + markov->initial_probability_computation(); + } + + // estimation of the categorical observation distributions + + if (markov->nb_output_process == 1) { + seq->build_observation_frequency_distribution(markov->nb_state); + + for (i = 0;i < markov->nb_state;i++) { + seq->observation_distribution[1][i]->distribution_estimation(markov->categorical_process[0]->observation[i]); + } + } + + // computation of the log-likelihood and the characteristic distributions of the model + + seq->likelihood = markov->likelihood_computation(*seq); + +# ifdef MESSAGE + cout << "\n" << STAT_label[STATL_LIKELIHOOD] << ": " << seq->likelihood + << " | " << markov->likelihood_computation(*seq , I_DEFAULT) << endl; +# endif + + if (seq->likelihood == D_INF) { + delete markov; + markov = NULL; + error.update(STAT_error[STATR_ESTIMATION_FAILURE]); + } + + else { + markov->component_computation(); + markov->characteristic_computation(*seq , counting_flag , I_DEFAULT , false); + } + } + + return markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a fixed-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] type process type (ORDINARY/EQUILIBRIUM), + * \param[in] order Markov chain order, + * \param[in] global_initial_transition type of estimation of the initial transition probabilities (ordinary process case), + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov* MarkovianSequences::variable_order_markov_estimation(StatError &error , + process_type type , int order , + bool global_initial_transition , + bool counting_flag) const + +{ + bool status = true; + VariableOrderMarkov *imarkov , *markov; + + + markov = NULL; + error.init(); + + if ((order < 1) || (order > ORDER)) { + status = false; + error.update(SEQ_error[SEQR_ORDER]); + } + else { + if ((int)pow((double)marginal_distribution[0]->nb_value , order + 1) > NB_PARAMETER) { + status = false; + error.update(SEQ_error[SEQR_NB_PARAMETER]); + } + } + + if (status) { + imarkov = new VariableOrderMarkov(type , marginal_distribution[0]->nb_value , order , true , + nb_variable - 1 , (nb_variable == 2 ? marginal_distribution[1]->nb_value : 0)); + imarkov->build_previous_memory(); + + markov = variable_order_markov_estimation(error , *imarkov , global_initial_transition , + counting_flag); + delete imarkov; + } + + return markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of transition counts. + * + * \param[in,out] os stream, + * \param[in] begin flag for taking account of the beginning of sequences. + */ +/*--------------------------------------------------------------*/ + +ostream& VariableOrderMarkov::transition_count_ascii_write(ostream &os , bool begin) const + +{ + bool *bic_memory , *kt_memory; + int i , j , k; + int buff , max_memory_count , row , initial_count , *memory_count , *max_state , + *nb_parameter , *diff_nb_parameter , width[3]; + double standard_normal_value , half_confidence_interval , diff , max_abs_diff , child_likelihood , + child_krichevsky_trofimov , num , denom , *diff_count , *initial_likelihood , + *memory_likelihood , *max_likelihood , *krichevsky_trofimov , *max_krichevsky_trofimov , + **confidence_limit , **transition_likelihood , **diff_likelihood; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + memory_count = new int[nb_row]; + + if (begin) { + initial_count = 0; + for (i = 0;i < nb_state;i++) { + initial_count += markov_data->chain_data->initial[i]; + } + width[0] = column_width(initial_count); + + max_memory_count = 0; + } + + else { + width[0] = 0; + row = 0; + } + + for (i = (begin ? 1 : 0);i < nb_row;i++) { + memory_count[i] = 0; + for (j = 0;j < nb_state;j++) { + memory_count[i] += markov_data->chain_data->transition[i][j]; + } + + if ((begin) && (memory_count[i] > max_memory_count)) { + max_memory_count = memory_count[i]; + row = i; + } + } + + buff = column_width(memory_count[row]); + if (buff > width[0]) { + width[0] = buff; + } + width[0] += ASCII_SPACE; + + // Peres-Shields fluctuation estimator + + diff_count = new double[nb_row]; + max_state = new int[nb_row]; + + diff_count[0] = memory_count[row]; + + for (i = 1;i < nb_row;i++) { + diff_count[i] = memory_count[row]; + if (((!begin) || (order[i] > 1)) && (memory_count[i] > 0)) { + max_abs_diff = 0.; + for (j = 0;j < nb_state;j++) { + diff = fabs(transition[parent[i]][j] * memory_count[i] - + markov_data->chain_data->transition[i][j]); +// if (diff > diff_count[i]) { +// diff_count[i] = diff; + if (diff > max_abs_diff) { + max_abs_diff = diff; + diff_count[i] = transition[parent[i]][j] * memory_count[i] - + markov_data->chain_data->transition[i][j]; + max_state[i] = j; + } + } + } + } + width[1] = column_width(nb_row - 1 , diff_count + 1) + ASCII_SPACE; + + if (begin) { + os << "\n" << SEQ_label[SEQL_INITIAL_COUNTS] << endl; + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[0]) << i; + } + os << endl; + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[0]) << markov_data->chain_data->initial[i]; + } + os << " " << setw(width[0]) << initial_count << endl; + } + +// os << "\nthreshold: " << pow((double)memory_count[0] , 0.75) << endl; + + os << "\n" << SEQ_label[SEQL_TRANSITION_COUNTS] << endl; + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[0]) << i; + } + os << setw(width[0]) << " " + << " " << SEQ_label[SEQL_MAX_TRANSITION_COUNT_DIFFERENCE]; + + for (i = (begin ? 1 : 0);i <= max_order;i++) { + os << "\n"; + for (j = 0;j < nb_row;j++) { + if (order[j] == i) { + for (k = max_order - 1;k >= order[j];k--) { + os << " "; + } + for (k = order[j] - 1;k >= 0;k--) { + os << state[j][k] << " "; + } + os << " "; + + for (k = 0;k < nb_state;k++) { + os << setw(width[0]) << markov_data->chain_data->transition[j][k]; + } + os << " " << setw(width[0]) << memory_count[j]; + + if (diff_count[j] != memory_count[row]) { + os << setw(width[1]) << diff_count[j] << " (" << max_state[j] << ")"; + } + os << endl; + } + } + } + os << endl; + + // computation of column widths + + width[1] = (begin ? column_width(nb_state , initial) : 0); + + for (i = (begin ? 1 : 0);i < nb_row;i++) { + buff = column_width(nb_state , transition[i]); + if (buff > width[1]) { + width[1] = buff; + } + } + width[1] += ASCII_SPACE; + + if (begin) { + os << "\n" << STAT_word[STATW_INITIAL_PROBABILITIES] << endl; + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[1]) << i; + } + os << endl; + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[1]) << initial[i]; + } + os << endl; + } + + os << "\n" << STAT_word[STATW_TRANSITION_PROBABILITIES] << endl; + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[1]) << i; + } + + for (i = (begin ? 1 : 0);i <= max_order;i++) { + os << "\n"; + for (j = 0;j < nb_row;j++) { + if ((order[j] == i) && (memory_count[j] > 0)) { + for (k = max_order - 1;k >= order[j];k--) { + os << " "; + } + for (k = order[j] - 1;k >= 0;k--) { + os << state[j][k] << " "; + } + os << " "; + + for (k = 0;k < nb_state;k++) { + os << setw(width[1]) << transition[j][k]; + } + os << endl; + } + } + } + + // computation of confidence intervals on the transition probabilities + + confidence_limit = new double*[nb_row]; + for (i = (begin ? 1 : 0);i < nb_row;i++) { + if (memory_count[i] > 0) { + confidence_limit[i] = new double[nb_state * 2]; + for (j = 0;j < nb_state * 2;j++) { + confidence_limit[i][j] = 0.; + } + } + } + + normal dist; + standard_normal_value = quantile(complement(dist , 0.025)); + + for (i = (begin ? 1 : 0);i < nb_row;i++) { + if (memory_count[i] > 0) { + for (j = 0;j < nb_state;j++) { + if ((transition[i][j] > 0.) && (transition[i][j] < 1.)) { + half_confidence_interval = standard_normal_value * + sqrt(transition[i][j] * (1. - transition[i][j]) / memory_count[i]); + confidence_limit[i][2 * j] = MAX(transition[i][j] - half_confidence_interval , 0.); + confidence_limit[i][2 * j + 1] = MIN(transition[i][j] + half_confidence_interval , 1.); + } + } + } + } + + // computation of column widths + + width[1] = 0; + for (i = (begin ? 1 : 0);i < nb_row;i++) { + if (memory_count[i] > 0) { + buff = column_width(2 * nb_state , confidence_limit[i]); + if (buff > width[1]) { + width[1] = buff; + } + } + } + width[1] += ASCII_SPACE; + + os << "\n" << SEQ_label[SEQL_TRANSITION_PROBABILITIY_CONFIDENCE_INTERVAL] << endl; + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[1]) << i + << setw(width[1]) << " "; + } + os << " " << SEQ_label[SEQL_COUNT]; + + for (i = (begin ? 1 : 0);i <= max_order;i++) { + os << "\n"; + for (j = 0;j < nb_row;j++) { + if ((order[j] == i) && (memory_count[j] > 0)) { + for (k = max_order - 1;k >= order[j];k--) { + os << " "; + } + for (k = order[j] - 1;k >= 0;k--) { + os << state[j][k] << " "; + } + os << " "; + + for (k = 0;k < nb_state;k++) { + if ((transition[j][k] > 0.) && (transition[j][k] < 1.)) { + os << setw(width[1]) << confidence_limit[j][2 * k] + << setw(width[1]) << confidence_limit[j][2 * k + 1]; + } + else { + os << setw(width[1]) << " " + << setw(width[1]) << " "; + } + } + os << " " << setw(width[0]) << memory_count[j] << endl; + } + } + } + + // computation of log-likelihoods + + if (begin) { + initial_likelihood = new double[nb_state + 1]; + + initial_likelihood[nb_state] = 0.; + for (i = 0;i < nb_state;i++) { + if (markov_data->chain_data->initial[i] > 0) { + initial_likelihood[i] = markov_data->chain_data->initial[i] * log(initial[i]); + initial_likelihood[nb_state] += initial_likelihood[i]; + } + else { + initial_likelihood[i] = 0.; + } + } + } + + transition_likelihood = new double*[nb_row]; + memory_likelihood = new double[nb_row]; + krichevsky_trofimov = new double[nb_row]; + nb_parameter = new int[nb_row]; + + diff_likelihood = new double*[2]; + diff_likelihood[0] = new double[nb_row]; + diff_likelihood[1] = new double[nb_row]; + + max_likelihood = new double[nb_row]; + bic_memory = new bool[nb_row]; + max_krichevsky_trofimov = new double[nb_row]; + kt_memory = new bool[nb_row]; + diff_nb_parameter = new int[nb_row]; + + if (begin) { + memory_likelihood[0] = 0.; + krichevsky_trofimov[0] = 0.; + nb_parameter[0] = 0; + + diff_likelihood[0][0] = 0; + diff_likelihood[1][0] = 0; + max_likelihood[0] = 0; + max_krichevsky_trofimov[0] = 0; + } + + for (i = (begin ? 1 : 0);i < nb_row;i++) { + transition_likelihood[i] = new double[nb_state]; + + memory_likelihood[i] = 0.; + nb_parameter[i] = 0; + for (j = 0;j < nb_state;j++) { + if (markov_data->chain_data->transition[i][j] > 0) { + nb_parameter[i]++; + transition_likelihood[i][j] = markov_data->chain_data->transition[i][j] * log(transition[i][j]); + memory_likelihood[i] += transition_likelihood[i][j]; + } + else { + transition_likelihood[i][j] = 0.; + } + } + + if (nb_parameter[i] > 0) { + nb_parameter[i]--; + } + + // Krichevsky-Trofimov estimator + + krichevsky_trofimov[i] = 0.; +// krichevsky_trofimov[i] = 1.; + if (memory_count[i] > 0) { + denom = memory_count[i] - 1. + (double)nb_state / 2.; + for (j = 0;j < nb_state;j++) { + if (markov_data->chain_data->transition[i][j] > 0) { + num = markov_data->chain_data->transition[i][j] - 0.5; + for (k = 0;k < markov_data->chain_data->transition[i][j];k++) { +// krichevsky_trofimov[i] *= num / denom; + krichevsky_trofimov[i] += log(num) - log(denom); + num--; + denom--; + } + } + } + } + } + + for (i = nb_row - 1;i >= (begin ? 1 : 0);i--) { + max_likelihood[i] = 2 * memory_likelihood[i] - nb_parameter[i] * log((double)memory_count[0]); + max_krichevsky_trofimov[i] = krichevsky_trofimov[i]; + + if (order[i] == max_order) { + diff_likelihood[0][i] = 0.; + diff_likelihood[1][i] = 0.; + } + + else { + diff = -memory_likelihood[i]; + child_likelihood = 0.; + child_krichevsky_trofimov = 0.; + diff_nb_parameter[i] = -nb_parameter[i]; + for (j = 0;j < nb_state;j++) { + diff += memory_likelihood[child[i][j]]; + child_likelihood += max_likelihood[child[i][j]]; + child_krichevsky_trofimov += max_krichevsky_trofimov[child[i][j]]; + diff_nb_parameter[i] += nb_parameter[child[i][j]]; + } + diff_likelihood[0][i] = 2 * diff - diff_nb_parameter[i] * log((double)memory_count[0]); + diff_likelihood[1][i] = 2 * diff - diff_nb_parameter[i] * log((double)memory_count[i]); + + if (child_likelihood > max_likelihood[i]) { + max_likelihood[i] = child_likelihood; + bic_memory[i] = true; + } + else { + bic_memory[i] = false; + } + + if (child_krichevsky_trofimov > max_krichevsky_trofimov[i]) { + max_krichevsky_trofimov[i] = child_krichevsky_trofimov; + kt_memory[i] = true; + } + else { + kt_memory[i] = false; + } + } + } + + // computation of column widths + + if (begin) { + width[1] = column_width(nb_state + 1 , initial_likelihood); + } + else { + width[1] = 0; + } + + for (i = (begin ? 1 : 0);i < nb_row;i++) { + buff = column_width(nb_state , transition_likelihood[i]); + if (buff > width[1]) { + width[1] = buff; + } + } + + buff = column_width(nb_row , memory_likelihood); + if (buff > width[1]) { + width[1] = buff; + } + buff = column_width(nb_row , krichevsky_trofimov); + if (buff > width[1]) { + width[1] = buff; + } + + buff = column_width(nb_row , diff_likelihood[0]); + if (buff > width[1]) { + width[1] = buff; + } + buff = column_width(nb_row , diff_likelihood[1]); + if (buff > width[1]) { + width[1] = buff; + } + buff = column_width(nb_row , max_likelihood); + if (buff > width[1]) { + width[1] = buff; + } + buff = column_width(nb_row , max_krichevsky_trofimov); + if (buff > width[1]) { + width[1] = buff; + } + width[1] += ASCII_SPACE; + + width[2] = column_width(nb_state - 1) + ASCII_SPACE; + + os << "\n\n" << SEQ_label[SEQL_LIKELIHOODS] << endl; + + if (begin) { + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[1]) << i; + } + os << endl; + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[1]) << initial_likelihood[i]; + } + os << " " << setw(width[1]) << initial_likelihood[nb_state] << "\n\n"; + } + + for (i = 0;i < max_order;i++) { + os << " "; + } + os << " "; + + for (i = 0;i < nb_state;i++) { + os << setw(width[1]) << i; + } + os << setw(width[1]) << " " + << " " << SEQ_label[SEQL_COUNT] + << " " << STAT_label[STATL_FREE_PARAMETERS]; + if (!begin) { +/* os << " " << STAT_criterion_word[BIC] << " " << STAT_criterion_word[BICc] + << " " << SEQ_label[SEQL_KRICHEVSKY_TROFIMOV]; */ + + os << " " << SEQ_label[SEQL_DELTA] << " " << STAT_label[STATL_FREE_PARAMETERS] + << " " << SEQ_label[SEQL_DELTA] << " " << STAT_criterion_word[BIC] + << " " << SEQ_label[SEQL_DELTA] << " " << STAT_criterion_word[BICc] + << " " << SEQ_label[SEQL_DELTA] << " " << STAT_criterion_word[BIC] + << " " << SEQ_label[SEQL_DELTA] << " " << SEQ_label[SEQL_KRICHEVSKY_TROFIMOV]; + } + os << endl; + + for (i = (begin ? 1 : 0);i <= max_order;i++) { + os << "\n"; + for (j = 0;j < nb_row;j++) { + if (order[j] == i) { + for (k = max_order - 1;k >= order[j];k--) { + os << " "; + } + for (k = order[j] - 1;k >= 0;k--) { + os << state[j][k] << " "; + } + os << " "; + + for (k = 0;k < nb_state;k++) { + os << setw(width[1]) << transition_likelihood[j][k]; + } + + os << " " << setw(width[1]) << memory_likelihood[j] + << " " << setw(width[0]) << memory_count[j] + << " " << setw(width[2]) << nb_parameter[j]; + +/* if (!begin) { + os << " " << setw(width[1]) << 2 * memory_likelihood[j] - + nb_parameter[j] * log((double)memory_count[0]); + + if (memory_count[parent[j]] > 0) { + os << " " << setw(width[1]) << 2 * memory_likelihood[j] - + nb_parameter[j] * log((double)memory_count[parent[j]]); + } + else { + os << " " << setw(width[1]) << 0; + } + + os << " " << setw(width[1]) << krichevsky_trofimov[j]; + } */ + + if ((!begin) && (order[j] < max_order)) { + os << " " << setw(width[2]) << diff_nb_parameter[j] + << " " << setw(width[1]) << diff_likelihood[0][j]; + if (memory_count[j] > 0) { + os << setw(width[1]) << diff_likelihood[1][j]; + } + else { + os << setw(width[1]) << 0; + } + os << " " << setw(width[1]) << max_likelihood[j] << " " << bic_memory[j] + << setw(width[1]) << max_krichevsky_trofimov[j] << " " << kt_memory[j]; + } + + os << endl; + } + } + } + + delete [] diff_count; + delete [] max_state; + + for (i = (begin ? 1 : 0);i < nb_row;i++) { + if (memory_count[i] > 0) { + delete [] confidence_limit[i]; + } + } + delete [] confidence_limit; + + delete [] memory_count; + + if (begin) { + delete [] initial_likelihood; + } + + for (i = (begin ? 1 : 0);i < nb_row;i++) { + delete [] transition_likelihood[i]; + } + delete [] transition_likelihood; + + delete [] memory_likelihood; + delete [] krichevsky_trofimov; + delete [] nb_parameter; + + delete [] diff_likelihood[0]; + delete [] diff_likelihood[1]; + delete [] diff_likelihood; + + delete [] max_likelihood; + delete [] bic_memory; + delete [] max_krichevsky_trofimov; + delete [] kt_memory; + delete [] diff_nb_parameter; + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Counting of transitions for successive orders. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying transition counts, + * \param[in] max_order maximum order, + * \param[in] begin flag for taking account of the beginning of sequences, + * \param[in] estimator estimator (maximum likelihood, Laplace, adaptative Laplace), + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::transition_count(StatError &error , ostream *os , int max_order , + bool begin , transition_estimator estimator , + const string path) const + +{ + bool status = true; + int i; + VariableOrderMarkov *markov; + VariableOrderMarkovData *seq; + + + error.init(); + + if (nb_variable > 1) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , 1); + } + + if ((type[0] != INT_VALUE) && (type[0] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if ((marginal_distribution[0]->nb_value < 2) || + (marginal_distribution[0]->nb_value > NB_STATE)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + + else if (!characteristics[0]) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (marginal_distribution[0]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << SEQ_error[SEQR_MISSING_STATE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + } + + if ((max_order < 1) || (max_order > ORDER)) { + status = false; + error.update(SEQ_error[SEQR_ORDER]); + } + + if (status) { + markov = new VariableOrderMarkov(ORDINARY , marginal_distribution[0]->nb_value , max_order , true); + markov->build_previous_memory(); + + markov->markov_data = new VariableOrderMarkovData(*this , SEQUENCE_COPY , false); + + seq = markov->markov_data; + seq->state_variable_init(); + seq->build_transition_count(*markov , begin , !begin); + seq->chain_data->estimation(*markov , true , estimator); + + if (os) { + markov->transition_count_ascii_write(*os , begin); + } + + if (!path.empty()) { + ofstream out_file(path.c_str()); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + markov->transition_count_ascii_write(out_file , begin); + } + } + + delete markov; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the results of a comparison of models for a sample of sequences. + * + * \param[in,out] os stream, + * \param[in] nb_model number of models, + * \param[in] likelihood log-likelihoods, + * \param[in] label model label, + * \param[in] exhaustive flag detail level, + * \param[in] algorithm type of algorithm (NO_LATENT_STRUCTURE/FORWARD/VITERBI). + */ +/*--------------------------------------------------------------*/ + +ostream& MarkovianSequences::likelihood_write(ostream &os , int nb_model , double **likelihood , + const char *label , bool exhaustive , + latent_structure_algorithm algorithm) const + +{ + bool *status; + int i , j , k , m; + int buff , model , min , width[3] , *rank_cumul , **rank; + double max_likelihood , likelihood_cumul; + ios_base::fmtflags format_flags; + + + format_flags = os.setf(ios::right , ios::adjustfield); + + // computation of column widths + + if (exhaustive) { + width[0] = column_width(nb_sequence); + + width[1] = 0; + for (i = 0;i < nb_sequence;i++) { + buff = column_width(nb_model , likelihood[i]); + if (buff > width[1]) { + width[1] = buff; + } + +/* for (j = 0;j < nb_model;j++) { + buff = column_width(1 , likelihood[i] + j , + (likelihood[i][j] == D_INF ? 1. : 1. / length[i])); + if (buff > width[1]) { + width[1] = buff; + } + } */ + } + width[1] += ASCII_SPACE; + + width[2] = column_width(nb_model) + ASCII_SPACE; + + // writing of the matrix of log-likelihoods of each model for each sequence + + os << " "; + for (i = 0;i < nb_model;i++) { + os << " | " << label << " " << i + 1; + } + + if (nb_model == 2) { + os << " | " << SEQ_label[SEQL_LIKELIHOOD_RATIO]; + } + os << endl; + + for (i = 0;i < nb_sequence;i++) { + os << SEQ_label[SEQL_SEQUENCE] << " "; + os << setw(width[0]) << i + 1 << ":"; + for (j = 0;j < nb_model;j++) { +// os << setw(width[1]) << (likelihood[i][j] == D_INF ? likelihood[i][j] : likelihood[i][j] / length[i]); + os << setw(width[1]) << likelihood[i][j]; + } + + max_likelihood = likelihood[i][0]; + model = 0; + for (j = 1;j < nb_model;j++) { + if (likelihood[i][j] > max_likelihood) { + model = j; + max_likelihood = likelihood[i][j]; + } + } + os << setw(width[2]) << model + 1; + + if (nb_model == 2) { + if (likelihood[i][0] > likelihood[i][1]) { + os << " " << exp(likelihood[i][1] - likelihood[i][0]); + } + else { + os << " " << exp(likelihood[i][0] - likelihood[i][1]); + } + } + os << endl; + } + os << endl; + } + + // extraction of model ranks for each sequence + + rank = new int*[nb_model]; + for (i = 0;i < nb_model;i++) { + rank[i] = new int[nb_model + 1]; + for (j = 0;j < nb_model;j++) { + rank[i][j] = 0; + } + } + + status = new bool[nb_model]; + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_model;j++) { + status[j] = true; + } + + j = 0; + while (j < nb_model) { + max_likelihood = 2 * D_INF; + for (k = 0;k < nb_model;k++) { + if ((status[k]) && (likelihood[i][k] > max_likelihood)) { + model = k; + max_likelihood = likelihood[i][k]; + } + } + status[model] = false; + rank[model][j]++; + + m = 1; + for (k = 0;k < nb_model;k++) { + if ((status[k]) && (likelihood[i][k] == max_likelihood)) { + status[k] = false; + rank[k][j]++; + m++; + } + } + + j += m; + } + } + + // extraction of model ranks for all the sequences + + rank_cumul = new int[nb_model]; + + for (i = 0;i < nb_model;i++) { + status[i] = true; + rank_cumul[i] = 0; + for (j = 0;j < nb_model;j++) { + rank_cumul[i] += rank[i][j] * (j + 1); + } + } + + i = 0; + while (i < nb_model) { + min = nb_model * nb_sequence + 1; + for (j = 0;j < nb_model;j++) { + if ((status[j]) && (rank_cumul[j] < min)) { + model = j; + min = rank_cumul[j]; + } + } + status[model] = false; + rank[model][nb_model] = i + 1; + + k = 1; + for (j = 0;j < nb_model;j++) { + if ((status[j]) && (rank_cumul[j] == min)) { + status[j] = false; + rank[j][nb_model] = i + 1; + k++; + } + } + + i += k; + } + + width[0] = column_width(nb_model); + width[1] = column_width(nb_sequence) + ASCII_SPACE; + + for (i = 0;i < nb_model;i++) { + os << label << " "; + os << setw(width[0]) << i + 1 << ":"; + for (j = 0;j < nb_model;j++) { + os << setw(width[1]) << rank[i][j]; + } + os << " (" << rank[i][nb_model] << ")"; + + // computation of the log-likelihood of a model for the sequences + + if (exhaustive) { + likelihood_cumul = 0.; + for (j = 0;j < nb_sequence;j++) { + if (likelihood[j][i] != D_INF) { + likelihood_cumul += likelihood[j][i]; + } + else { + likelihood_cumul = D_INF; + break; + } + } + + switch (algorithm) { + case NO_LATENT_STRUCTURE : + os << " | " << STAT_label[STATL_LIKELIHOOD]; + break; + case FORWARD : + os << " | " << SEQ_label[SEQL_OBSERVED_SEQUENCES_LIKELIHOOD]; + break; + case VITERBI : + os << " | " << SEQ_label[SEQL_STATE_SEQUENCES_LIKELIHOOD]; + break; + } +/* if (likelihood_cumul != D_INF) { + os << ": " << likelihood_cumul / cumul_length; + } + else { */ + os << ": " << likelihood_cumul; +// } + } + os << endl; + } + os << endl; + + for (i = 0;i < nb_model;i++) { + delete [] rank[i]; + } + delete [] rank; + + delete [] status; + delete [] rank_cumul; + + os.setf(format_flags , ios::adjustfield); + + return os; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Writing of the results of a comparison of models for a sample of sequences in a file. + * + * \param[in] error reference on a StatError object, + * \param[in] path file path, + * \param[in] nb_model number of models, + * \param[in] likelihood log-likelihoods, + * \param[in] label model label, + * \param[in] algorithm type of algorithm (NO_LATENT_STRUCTURE/FORWARD/VITERBI). + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::likelihood_write(StatError &error , const string path , + int nb_model , double **likelihood , const char *label , + latent_structure_algorithm algorithm) const + +{ + bool status; + ofstream out_file(path.c_str()); + + + error.init(); + + if (!out_file) { + status = false; + error.update(STAT_error[STATR_FILE_NAME]); + } + + else { + status = true; + likelihood_write(out_file , nb_model , likelihood , label , true , algorithm); + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Comparison of variable-order Markov chains for a sample of sequences. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the results of model comparison, + * \param[in] nb_model number of variable-order Markov chains, + * \param[in] imarkov pointer on VariableOrderMarkov objects, + * \param[in] path file path. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::comparison(StatError &error , ostream *os , int nb_model , + const VariableOrderMarkov **imarkov , + const string path) const + +{ + bool status = true; + int i , j; + double **likelihood; + + + error.init(); + + if ((type[0] != INT_VALUE) && (type[0] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else if (!characteristics[0]) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (marginal_distribution[0]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << SEQ_error[SEQR_MISSING_STATE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + + if (nb_variable > 1) { + if (nb_variable > 2) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , "1 or 2"); + } + + if ((type[1] != INT_VALUE) && (type[1] != STATE)) { + status = false; + ostringstream error_message , correction_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_VARIABLE_TYPE]; + correction_message << STAT_variable_word[INT_VALUE] << " or " + << STAT_variable_word[STATE]; + error.correction_update((error_message.str()).c_str() , (correction_message.str()).c_str()); + } + + else { + if (test_hidden(1)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << SEQ_error[SEQR_OVERLAP]; + error.update((error_message.str()).c_str()); + } + + if (!characteristics[1]) { + for (i = 0;i < marginal_distribution[1]->nb_value;i++) { + if (marginal_distribution[1]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_VARIABLE] << " " << 2 << ": " + << STAT_error[STATR_MISSING_VALUE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + } + } + + for (i = 0;i < nb_model;i++) { + if (imarkov[i]->nb_output_process + 1 != nb_variable) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT_PROCESS]; + error.update((error_message.str()).c_str()); + } + + else { + if (imarkov[i]->state_process->nb_value < marginal_distribution[0]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 1 << ": " + << SEQ_error[SEQR_NB_STATE]; + error.update((error_message.str()).c_str()); + } + + if (nb_variable == 2) { + if (imarkov[i]->categorical_process[0]->nb_value < marginal_distribution[1]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 1 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + } + + if (status) { + likelihood = new double*[nb_sequence]; + for (i = 0;i < nb_sequence;i++) { + likelihood[i] = new double[nb_model]; + } + + // for each sequence, computation of the log-likelihood for each model + + for (i = 0;i < nb_sequence;i++) { + for (j = 0;j < nb_model;j++) { + likelihood[i][j] = imarkov[j]->likelihood_computation(*this , i); + } + } + + if (os) { + likelihood_write(*os , nb_model , likelihood , SEQ_label[SEQL_MARKOV_CHAIN] , true); + } + if (!path.empty()) { + status = likelihood_write(error , path , nb_model , likelihood , SEQ_label[SEQL_MARKOV_CHAIN]); + } + + for (i = 0;i < nb_sequence;i++) { + delete [] likelihood[i]; + } + delete [] likelihood; + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a variable-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] divergence_flag flag on the computation of a Kullback-Leibler divergence. + * + * \return VariableOrderMarkovData. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* VariableOrderMarkov::simulation(StatError &error , + const FrequencyDistribution &length_distribution , + bool counting_flag , + bool divergence_flag) const + +{ + bool status = true , hidden; + int i , j , k; + int memory , cumul_length , *decimal_scale , *pstate , **pioutput; + variable_nature *itype; + double buff , min_location , likelihood , **proutput; + Distribution *weight , *restoration_weight; + VariableOrderMarkov *markov; + VariableOrderMarkovData *seq; + + + seq = NULL; + error.init(); + + if ((length_distribution.nb_element < 1) || (length_distribution.nb_element > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length_distribution.offset < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length_distribution.nb_value - 1 > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + cumul_length = 0; + for (i = length_distribution.offset;i < length_distribution.nb_value;i++) { + cumul_length += i * length_distribution.frequency[i]; + } + + if (cumul_length > CUMUL_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_CUMUL_SEQUENCE_LENGTH]); + } + } + + if (status) { + if (length_distribution.nb_value - 1 > COUNTING_MAX_LENGTH) { + counting_flag = false; + } + hidden = CategoricalSequenceProcess::test_hidden(nb_output_process , categorical_process); + + // initializations + + itype = new variable_nature[nb_output_process + 1]; + + itype[0] = STATE; + for (i = 0;i < nb_output_process;i++) { + if (!continuous_parametric_process[i]) { + itype[i + 1] = INT_VALUE; + } + else { + itype[i + 1] = REAL_VALUE; + } + } + + seq = new VariableOrderMarkovData(length_distribution , nb_output_process + 1 , itype); + delete [] itype; + + seq->markov = new VariableOrderMarkov(*this , false); + + markov = seq->markov; + markov->create_cumul(); + markov->cumul_computation(); + + if (markov->nb_output_process > 0) { + pioutput = new int*[markov->nb_output_process]; + proutput = new double*[markov->nb_output_process]; + + decimal_scale = new int[markov->nb_output_process]; + + for (i = 0;i < markov->nb_output_process;i++) { + if (markov->continuous_parametric_process[i]) { + switch (markov->continuous_parametric_process[i]->ident) { + + case GAUSSIAN : { + min_location = fabs(markov->continuous_parametric_process[i]->observation[0]->location); + for (j = 1;j < markov->nb_state;j++) { + buff = fabs(markov->continuous_parametric_process[i]->observation[j]->location); + if (buff < min_location) { + min_location = buff; + } + } + + buff = (int)ceil(log(min_location) / log(10)); + if (buff < GAUSSIAN_MAX_NB_DECIMAL) { + decimal_scale[i] = pow(10 , (GAUSSIAN_MAX_NB_DECIMAL - buff)); + } + else { + decimal_scale[i] = 1; + } + +# ifdef MESSAGE + cout << "\nScale: " << i + 1 << " " << decimal_scale[i] << endl; +# endif + + break; + } + + case VON_MISES : { + switch (markov->continuous_parametric_process[i]->unit) { + case DEGREE : + decimal_scale[i] = DEGREE_DECIMAL_SCALE; + break; + case RADIAN : + decimal_scale[i] = RADIAN_DECIMAL_SCALE; + break; + } + + for (j = 0;j < markov->nb_state;j++) { + markov->continuous_parametric_process[i]->observation[j]->von_mises_cumul_computation(); + } + break; + } + } + } + } + } + + for (i = 0;i < seq->nb_sequence;i++) { + pstate = seq->int_sequence[i][0]; + + for (j = 0;j < markov->nb_output_process;j++) { + switch (seq->type[j + 1]) { + case INT_VALUE : + pioutput[j] = seq->int_sequence[i][j + 1]; + break; + case REAL_VALUE : + proutput[j] = seq->real_sequence[i][j + 1]; + break; + } + } + + switch (markov->type) { + case ORDINARY : + *pstate = cumul_method(markov->nb_state , markov->cumul_initial); + memory = markov->child[0][*pstate]; + break; + case EQUILIBRIUM : + memory = cumul_method(markov->nb_row , markov->cumul_initial); + *pstate = markov->state[memory][0]; + break; + } + + for (j = 0;j < markov->nb_output_process;j++) { + if (markov->categorical_process[j]) { + *pioutput[j] = markov->categorical_process[j]->observation[*pstate]->simulation(); + } + else if (markov->discrete_parametric_process[j]) { + *pioutput[j] = markov->discrete_parametric_process[j]->observation[*pstate]->simulation(); + } + else { + *proutput[j] = round(markov->continuous_parametric_process[j]->observation[*pstate]->simulation() * decimal_scale[j]) / decimal_scale[j]; + } + } + + for (j = 1;j < seq->length[i];j++) { + *++pstate = cumul_method(markov->nb_state , markov->cumul_transition[memory]); + + for (k = 0;k < markov->nb_output_process;k++) { + if (markov->categorical_process[k]) { + *++pioutput[k] = markov->categorical_process[k]->observation[*pstate]->simulation(); + } + else if (markov->discrete_parametric_process[k]) { + *++pioutput[k] = markov->discrete_parametric_process[k]->observation[*pstate]->simulation(); + } + else { + *++proutput[k] = round(markov->continuous_parametric_process[k]->observation[*pstate]->simulation() * decimal_scale[k]) / decimal_scale[k]; + } + } + + memory = markov->next[memory][*pstate]; + } + } + + markov->remove_cumul(); + + if (markov->nb_output_process > 0) { + delete [] pioutput; + delete [] proutput; + + delete [] decimal_scale; + + for (i = 0;i < markov->nb_output_process;i++) { + if ((markov->continuous_parametric_process[i]) && + (markov->continuous_parametric_process[i]->ident == VON_MISES)) { + for (j = 0;j < markov->nb_state;j++) { + delete [] markov->continuous_parametric_process[i]->observation[j]->cumul; + markov->continuous_parametric_process[i]->observation[j]->cumul = NULL; + } + } + } + } + + // computation of the characteristics of the generated sequences + + seq->min_value[0] = 0; + seq->max_value[0] = nb_state - 1; + seq->build_marginal_frequency_distribution(0); + + for (i = 1;i < seq->nb_variable;i++) { + seq->min_value_computation(i); + seq->max_value_computation(i); + + seq->build_marginal_frequency_distribution(i); + seq->min_interval_computation(i); + } + + seq->build_transition_count(*markov); + seq->build_observation_frequency_distribution(nb_state); + seq->build_observation_histogram(nb_state); + seq->build_characteristic(); + +/* if ((seq->max_value[0] < nb_state - 1) || (!(seq->characteristics[0]))) { + delete seq; + seq = NULL; + error.update(SEQ_error[SEQR_STATES_NOT_REPRESENTED]); + } + + else if (!divergence_flag) { */ + if (!divergence_flag) { + markov->characteristic_computation(*seq , counting_flag); + + // computation of the log-likelihood of the model for the generated sequences + + likelihood = markov->likelihood_computation(*seq); + + if (likelihood == D_INF) { + likelihood = markov->likelihood_computation(*seq , I_DEFAULT); + } + +# ifdef DEBUG + else { + cout << "\n" << STAT_label[STATL_LIKELIHOOD] << ": " << likelihood + << " | " << markov->likelihood_computation(*seq , I_DEFAULT) << endl; + } +# endif + + if (hidden) { + seq->restoration_likelihood = likelihood; + } + else { + seq->likelihood = likelihood; + } + + // computation of the mixtures of observation distributions (theoretical weights and weights deduced from the restoration) + + if (hidden) { + weight = markov->state_process->weight_computation(); + restoration_weight = seq->weight_computation(); + + for (i = 0;i < markov->nb_output_process;i++) { + if (markov->categorical_process[i]) { + delete markov->categorical_process[i]->weight; + delete markov->categorical_process[i]->mixture; + markov->categorical_process[i]->weight = new Distribution(*weight); + markov->categorical_process[i]->mixture = markov->categorical_process[i]->mixture_computation(markov->categorical_process[i]->weight); + + delete markov->categorical_process[i]->restoration_weight; + delete markov->categorical_process[i]->restoration_mixture; + markov->categorical_process[i]->restoration_weight = new Distribution(*restoration_weight); + markov->categorical_process[i]->restoration_mixture = markov->categorical_process[i]->mixture_computation(markov->categorical_process[i]->restoration_weight); + } + + else if (markov->discrete_parametric_process[i]) { + delete markov->discrete_parametric_process[i]->weight; + delete markov->discrete_parametric_process[i]->mixture; + markov->discrete_parametric_process[i]->weight = new Distribution(*weight); + markov->discrete_parametric_process[i]->mixture = markov->discrete_parametric_process[i]->mixture_computation(markov->discrete_parametric_process[i]->weight); + + delete markov->discrete_parametric_process[i]->restoration_weight; + delete markov->discrete_parametric_process[i]->restoration_mixture; + markov->discrete_parametric_process[i]->restoration_weight = new Distribution(*restoration_weight); + markov->discrete_parametric_process[i]->restoration_mixture = markov->discrete_parametric_process[i]->mixture_computation(markov->discrete_parametric_process[i]->restoration_weight); + } + + else if (markov->continuous_parametric_process[i]) { + delete markov->continuous_parametric_process[i]->weight; + markov->continuous_parametric_process[i]->weight = new Distribution(*weight); + + delete markov->continuous_parametric_process[i]->restoration_weight; + markov->continuous_parametric_process[i]->restoration_weight = new Distribution(*restoration_weight); + } + } + + delete weight; + delete restoration_weight; + } + } + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a variable-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of sequences, + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* VariableOrderMarkov::simulation(StatError &error , + int nb_sequence , int length , + bool counting_flag) const + +{ + bool status = true; + VariableOrderMarkovData *seq; + + + seq = NULL; + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + FrequencyDistribution length_distribution(length + 1); + + length_distribution.nb_element = nb_sequence; + length_distribution.offset = length; + length_distribution.max = nb_sequence; + length_distribution.mean = length; + length_distribution.variance = 0.; + length_distribution.frequency[length] = nb_sequence; + + seq = simulation(error , length_distribution , counting_flag); + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a variable-order Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] nb_sequence number of sequences, + * \param[in] iseq reference on a MarkovianSequences object, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return VariableOrderMarkovData object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovData* VariableOrderMarkov::simulation(StatError &error , int nb_sequence , + const MarkovianSequences &iseq , + bool counting_flag) const + +{ + FrequencyDistribution *length_distribution; + VariableOrderMarkovData *seq; + + + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + seq = NULL; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + length_distribution = iseq.length_distribution->frequency_scale(nb_sequence); + + seq = simulation(error , *length_distribution , counting_flag); + delete length_distribution; + } + + return seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between variable-order Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of variable-order Markov chains, + * \param[in] imarkov pointer on VariableOrderMarkov objects, + * \param[in] length_distribution sequence length frequency distribution, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* VariableOrderMarkov::divergence_computation(StatError &error , ostream *os , int nb_model , + const VariableOrderMarkov **imarkov , + FrequencyDistribution **length_distribution , + const string path) const + +{ + bool status = true , lstatus; + int i , j , k; + int cumul_length , nb_failure; + double **likelihood; + long double divergence; + const VariableOrderMarkov **markov; + MarkovianSequences *iseq , *seq; + VariableOrderMarkovData *simul_seq; + DistanceMatrix *dist_matrix; + ofstream *out_file; + + + dist_matrix = NULL; + error.init(); + + for (i = 0;i < nb_model - 1;i++) { + if (imarkov[i]->type != type) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 2 << ": " + << SEQ_error[SEQR_MODEL_TYPE]; + error.update((error_message.str()).c_str()); + } + + if (imarkov[i]->nb_output_process == nb_output_process) { + if (imarkov[i]->nb_state != nb_state) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 2 << ": " + << SEQ_error[SEQR_NB_STATE]; + error.update((error_message.str()).c_str()); + } + + if (nb_output_process == 1) { + if (imarkov[i]->categorical_process[0]->nb_value != categorical_process[0]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + } + + else if ((nb_output_process == 0) && (imarkov[i]->nb_output_process == 1)) { + if (imarkov[i]->categorical_process[0]->nb_value != nb_state) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 2 << ": " + << STAT_error[STATR_NB_OUTPUT]; + error.update((error_message.str()).c_str()); + } + } + + else { // if ((nb_output_process == 1) && (imarkov[i]->nb_output_process == 0)) + if (imarkov[i]->nb_state != categorical_process[0]->nb_value) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 2 << ": " + << SEQ_error[SEQR_NB_STATE]; + error.update((error_message.str()).c_str()); + } + } + } + + for (i = 0;i < nb_model;i++) { + lstatus = true; + + if ((length_distribution[i]->nb_element < 1) || (length_distribution[i]->nb_element > NB_SEQUENCE)) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_NB_SEQUENCE]; + error.update((error_message.str()).c_str()); + } + if (length_distribution[i]->offset < 2) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + if (length_distribution[i]->nb_value - 1 > MAX_LENGTH) { + lstatus = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + + if (!lstatus) { + status = false; + } + + else { + cumul_length = 0; + for (j = length_distribution[i]->offset;j < length_distribution[i]->nb_value;j++) { + cumul_length += j * length_distribution[i]->frequency[j]; + } + + if (cumul_length > CUMUL_LENGTH) { + status = false; + ostringstream error_message; + error_message << SEQ_label[SEQL_SEQUENCE_LENGTH] << " " << STAT_label[STATL_FREQUENCY_DISTRIBUTION] << " " + << i + 1 << ": " << SEQ_error[SEQR_CUMUL_SEQUENCE_LENGTH]; + error.update((error_message.str()).c_str()); + } + } + } + + if (status) { + out_file = NULL; + + if (!path.empty()) { + out_file = new ofstream(path.c_str()); + + if (!out_file) { + error.update(STAT_error[STATR_FILE_NAME]); + if (os) { + *os << error; + } + } + } + + markov = new const VariableOrderMarkov*[nb_model]; + + markov[0] = this; + for (i = 1;i < nb_model;i++) { + markov[i] = imarkov[i - 1]; + } + + dist_matrix = new DistanceMatrix(nb_model , SEQ_label[SEQL_MARKOV_CHAIN]); + + for (i = 0;i < nb_model;i++) { + + // generation of a sample of sequences using a variable-order Markov chain + + simul_seq = markov[i]->simulation(error , *length_distribution[i] , false , true); + + likelihood = new double*[simul_seq->nb_sequence]; + for (j = 0;j < simul_seq->nb_sequence;j++) { + likelihood[j] = new double[nb_model]; + } + + for (j = 0;j < simul_seq->nb_sequence;j++) { + likelihood[j][i] = markov[i]->likelihood_computation(*simul_seq , j); + + if ((os) && (likelihood[j][i] == D_INF)) { + *os << "\nERROR - " << SEQ_error[SEQR_REFERENCE_MODEL] << ": " << i + 1 << endl; + } + } + + if (markov[i]->nb_output_process == 1) { + iseq = simul_seq->remove_variable_1(); + } + else { + iseq = simul_seq; + } + + // computation of the log-likelihood of each variable-order Markov chain for the sample of sequences + + for (j = 0;j < nb_model;j++) { + if (j != i) { + if (markov[j]->nb_output_process == 1) { + seq = iseq->transcode(error , markov[j]->categorical_process[0]); + } + else { + seq = iseq; + } + + divergence = 0.; + cumul_length = 0; + nb_failure = 0; + + for (k = 0;k < seq->nb_sequence;k++) { + likelihood[k][j] = markov[j]->likelihood_computation(*seq , k); + +// if (divergence != -D_INF) { + if (likelihood[k][j] != D_INF) { + divergence += likelihood[k][i] - likelihood[k][j]; + cumul_length += seq->length[k]; + } + else { + nb_failure++; +// divergence = -D_INF; + } +// } + } + + if ((os) && (nb_failure > 0)) { + *os << "\nWARNING - " << SEQ_error[SEQR_REFERENCE_MODEL] << ": " << i + 1 << ", " + << SEQ_error[SEQR_TARGET_MODEL] << ": " << j + 1 << " - " + << SEQ_error[SEQR_DIVERGENCE_NB_FAILURE] << ": " << nb_failure << endl; + } + +// if (divergence != -D_INF) { + dist_matrix->update(i + 1 , j + 1 , divergence , cumul_length); +// } + + if (markov[j]->nb_output_process == 1) { + delete seq; + } + } + } + + if (os) { + *os << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 1 << ": " << simul_seq->nb_sequence << " " + << SEQ_label[SEQL_SIMULATED] << " " << SEQ_label[simul_seq->nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << endl; + simul_seq->likelihood_write(cout , nb_model , likelihood , SEQ_label[SEQL_MARKOV_CHAIN]); + } + if (out_file) { + *out_file << SEQ_label[SEQL_MARKOV_CHAIN] << " " << i + 1 << ": " << simul_seq->nb_sequence << " " + << SEQ_label[SEQL_SIMULATED] << " " << SEQ_label[simul_seq->nb_sequence == 1 ? SEQL_SEQUENCE : SEQL_SEQUENCES] << endl; + simul_seq->likelihood_write(*out_file , nb_model , likelihood , SEQ_label[SEQL_MARKOV_CHAIN]); + } + + for (j = 0;j < simul_seq->nb_sequence;j++) { + delete [] likelihood[j]; + } + delete [] likelihood; + + if (markov[i]->nb_output_process == 1) { + delete iseq; + } + delete simul_seq; + } + + if (out_file) { + out_file->close(); + delete out_file; + } + + delete markov; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between variable-order Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of variable-order Markov chains, + * \param[in] markov pointer on VariableOrderMarkov objects, + * \param[in] nb_sequence number of sequences, + * \param[in] length sequence length, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* VariableOrderMarkov::divergence_computation(StatError &error , ostream *os , int nb_model , + const VariableOrderMarkov **markov , + int nb_sequence , int length , const string path) const + +{ + bool status = true; + int i; + FrequencyDistribution **length_distribution; + DistanceMatrix *dist_matrix; + + + dist_matrix = NULL; + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + status = false; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + if (length < 2) { + status = false; + error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]); + } + if (length > MAX_LENGTH) { + status = false; + error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]); + } + + if (status) { + length_distribution = new FrequencyDistribution*[nb_model]; + + length_distribution[0] = new FrequencyDistribution(length + 1); + + length_distribution[0]->nb_element = nb_sequence; + length_distribution[0]->offset = length; + length_distribution[0]->max = nb_sequence; + length_distribution[0]->mean = length; + length_distribution[0]->variance = 0.; + length_distribution[0]->frequency[length] = nb_sequence; + + for (i = 1;i < nb_model;i++) { + length_distribution[i] = new FrequencyDistribution(*length_distribution[0]); + } + + dist_matrix = divergence_computation(error , os , nb_model , markov , length_distribution , path); + + for (i = 0;i < nb_model;i++) { + delete length_distribution[i]; + } + delete [] length_distribution; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of Kullback-Leibler divergences between variable-order Markov chains. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the matrix of pairwise distances between models, + * \param[in] nb_model number of variable-order Markov chains, + * \param[in] markov pointer on VariableOrderMarkov objects, + * \param[in] nb_sequence number of generated sequences, + * \param[in] seq pointer on MarkovianSequences objects, + * \param[in] path file path. + * + * \return DistanceMatrix object. + */ +/*--------------------------------------------------------------*/ + +DistanceMatrix* VariableOrderMarkov::divergence_computation(StatError &error , ostream *os , int nb_model , + const VariableOrderMarkov **markov , + int nb_sequence , const MarkovianSequences **seq , + const string path) const + +{ + int i; + FrequencyDistribution **length_distribution; + DistanceMatrix *dist_matrix; + + + error.init(); + + if ((nb_sequence < 1) || (nb_sequence > NB_SEQUENCE)) { + dist_matrix = NULL; + error.update(SEQ_error[SEQR_NB_SEQUENCE]); + } + + else { + length_distribution = new FrequencyDistribution*[nb_model]; + for (i = 0;i < nb_model;i++) { + length_distribution[i] = seq[i]->length_distribution->frequency_scale(nb_sequence); + } + + dist_matrix = divergence_computation(error , os , nb_model , markov , length_distribution , path); + + for (i = 0;i < nb_model;i++) { + delete length_distribution[i]; + } + delete [] length_distribution; + } + + return dist_matrix; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Constructor of the VariableOrderMarkovIterator class. + * + * \param[in] imarkov pointer on a VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovIterator::VariableOrderMarkovIterator(VariableOrderMarkov *imarkov) + +{ + markov = imarkov; + (markov->nb_iterator)++; + + if ((!(markov->cumul_initial)) || (!(markov->cumul_transition))) { + markov->create_cumul(); + markov->cumul_computation(); + } + + memory = I_DEFAULT; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Copy of a VariableOrderMarkovIterator object. + * + * \param[in] iter reference on a VariableOrderMarkovIterator object. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovIterator::copy(const VariableOrderMarkovIterator &iter) + +{ + markov = iter.markov; + (markov->nb_iterator)++; + + memory = iter.memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Destructor of the VariableOrderMarkovIterator class. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovIterator::~VariableOrderMarkovIterator() + +{ + (markov->nb_iterator)--; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Assignment operator of the VariableOrderMarkovIterator class. + * + * \param[in] iter reference on a VariableOrderMarkovIterator object. + * + * \return VariableOrderMarkovIterator object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkovIterator& VariableOrderMarkovIterator::operator=(const VariableOrderMarkovIterator &iter) + +{ + if (&iter != this) { + (markov->nb_iterator)--; + copy(iter); + } + + return *this; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a variable-order Markov chain. + * + * \param[in] int_seq sequence, + * \param[in] length sequence length, + * \param[in] initialization flag initialization. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool VariableOrderMarkovIterator::simulation(int **int_seq , int length , bool initialization) + +{ + bool status; + + + if ((memory == I_DEFAULT) && (!initialization)) { + status = false; + } + + else { + int i , j; + int offset = 0 , *pstate , **pioutput; +// double **proutput; + + + status = true; + + if (markov->nb_output_process > 0) { + pioutput = new int*[markov->nb_output_process]; +// proutput = new double*[markov->nb_output_process]; + } + + pstate = int_seq[0]; + for (i = 0;i < markov->nb_output_process;i++) { +/* switch (type[i + 1]) { + case INT_VALUE : */ + pioutput[i] = int_seq[i + 1]; +/* break; + case REAL_VALUE : + proutput[i] = real_seq[i + 1]; + break; + } */ + } + + if (initialization) { + switch (markov->type) { + case ORDINARY : + *pstate = cumul_method(markov->nb_state , markov->cumul_initial); + memory = markov->child[0][*pstate]; + break; + case EQUILIBRIUM : + memory = cumul_method(markov->nb_row , markov->cumul_initial); + *pstate = markov->state[memory][0]; + break; + } + + for (i = 0;i < markov->nb_output_process;i++) { + if (markov->categorical_process[i]) { + *pioutput[i]++ = markov->categorical_process[i]->observation[*pstate]->simulation(); + } + else if (markov->discrete_parametric_process[i]) { + *pioutput[i]++ = markov->discrete_parametric_process[i]->observation[*pstate]->simulation(); + } + else { +// *proutput[i]++ = markov->continuous_parametric_process[i]->observation[*pstate]->simulation(); + } + } + + pstate++; + offset++; + } + + for (i = offset;i < length;i++) { + *pstate = cumul_method(markov->nb_state , markov->cumul_transition[memory]); + + for (j = 0;j < markov->nb_output_process;j++) { + if (markov->categorical_process[j]) { + *pioutput[j]++ = markov->categorical_process[j]->observation[*pstate]->simulation(); + } + else if (markov->categorical_process[j]) { + *pioutput[j]++ = markov->discrete_parametric_process[j]->observation[*pstate]->simulation(); + } + else { +// *proutput[j]++ = markov->continuous_parametric_process[j]->observation[*pstate]->simulation(); + } + } + + memory = markov->next[memory][*pstate++]; + } + + if (markov->nb_output_process > 0) { + delete [] pioutput; +// delete [] proutput; + } + } + + return status; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Simulation using a variable-order Markov chain. + * + * \param[in] length sequence length, + * \param[in] initialization flag initialization. + * + * \return generated sequence. + */ +/*--------------------------------------------------------------*/ + +int** VariableOrderMarkovIterator::simulation(int length , bool initialization) + +{ + int i; + int **int_seq; + + + if ((memory == I_DEFAULT) && (!initialization)) { + int_seq = NULL; + } + + else { + int_seq = new int*[markov->nb_output_process + 1]; + for (i = 0;i <= markov->nb_output_process;i++) { + int_seq[i] = new int[length]; + } + + simulation(int_seq , length , initialization); + } + + return int_seq; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Correction of the log-likelihood of a variable-order Markov chain for sequences. + * + * \param[in] seq reference on a VariableOrderMarkovData object. + * + * \return log-likelihood. + */ +/*--------------------------------------------------------------*/ + +double VariableOrderMarkov::likelihood_correction(const VariableOrderMarkovData &seq) const + +{ + int i; + double correction; + + + correction = 0.; + + for (i = 0;i < nb_state;i++) { + if (seq.chain_data->initial[i] > 0) { + correction += seq.chain_data->initial[i] * log(initial[i]); + } + } + + if (nb_output_process > 0) { + for (i = 0;i < seq.nb_sequence;i++) { + correction += log(categorical_process[0]->observation[seq.int_sequence[i][0][0]]->mass[seq.int_sequence[i][1][0]]); + } + } + + return correction; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Estimation of a lumped Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the results of the estimation, + * \param[in] category transcoding table, + * \param[in] criterion model selection criterion (AIC(c)/BIC), + * \param[in] order Markov chain order, + * \param[in] counting_flag flag on the computation of the counting distributions. + * + * \return VariableOrderMarkov object. + */ +/*--------------------------------------------------------------*/ + +VariableOrderMarkov* MarkovianSequences::lumpability_estimation(StatError &error , ostream *os , int *category , + model_selection_criterion criterion , + int order , bool counting_flag) const + +{ + bool status = true , *presence; + int i; + int max_category , nb_state[2] , nb_parameter[2]; + double penalty , max_likelihood , likelihood[2] , penalized_likelihood[2]; + VariableOrderMarkov *markov , *lumped_markov; + MarkovianSequences *seq; + + + markov = NULL; + error.init(); + + if (nb_variable > 1) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , 1); + } + + if ((type[0] != INT_VALUE) && (type[0] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + max_category = 0; + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if ((category[i] < 0) || (category[i] >= marginal_distribution[0]->nb_value - 1)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_CATEGORY] << " " << category[i] << " " + << STAT_error[STATR_NOT_ALLOWED]; + error.update((error_message.str()).c_str()); + } + else if (category[i] > max_category) { + max_category = category[i]; + } + } + + if (max_category == 0) { + status = false; + error.update(STAT_error[STATR_NB_CATEGORY]); + } + + if (status) { + presence = new bool[max_category + 1]; + for (i = 0;i <= max_category;i++) { + presence[i] = false; + } + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + presence[category[i]] = true; + } + + for (i = 0;i <= max_category;i++) { + if (!presence[i]) { + status = false; + ostringstream error_message; + error_message << STAT_error[STATR_MISSING_CATEGORY] << " " << i; + error.update((error_message.str()).c_str()); + } + } + + delete [] presence; + } + } + + if ((order < 1) || (order > ORDER)) { + status = false; + error.update(SEQ_error[SEQR_ORDER]); + } + + if (status) { +// markov = variable_order_markov_estimation(error , type , order , true , false); + markov = variable_order_markov_estimation(error , ORDINARY , order , true , false); + + if (markov) { + + // computation of the compensation term + + switch (criterion) { + case AIC : + penalty = 1.; + break; + case BIC : + penalty = 0.5 * log((double)cumul_length); + break; + } + + nb_state[1] = markov->nb_state; + nb_parameter[1] = markov->nb_parameter_computation(); + likelihood[1] = markov->markov_data->likelihood - + markov->likelihood_correction(*(markov->markov_data)); + + if (criterion == AICc) { + if (nb_parameter[1] < cumul_length - 1) { + penalized_likelihood[1] = likelihood[1] - (double)(nb_parameter[1] * cumul_length) / + (double)(cumul_length - nb_parameter[1] - 1); + } + else { + penalized_likelihood[1] = D_INF; + } + } + + else { + penalized_likelihood[1] = likelihood[1] - nb_parameter[1] * penalty; + } + + max_likelihood = penalized_likelihood[1]; + + seq = transcode(error , 1 , category , true); + +// lumped_markov = seq->variable_order_markov_estimation(error , type , order , true , false); + lumped_markov = seq->variable_order_markov_estimation(error , ORDINARY , order , true , false); + + if (lumped_markov) { + if (os) { + int j , k; + int nb_output , sum , lumped_nb_parameter , *pstate , *poutput , ***observation_data; + double lumped_likelihood , lumped_penalized_likelihood; + + + // 2nd lumpability property (two-state- i.e. transition-dependent observation probabilities) + + observation_data = new int**[seq->marginal_distribution[0]->nb_value]; + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + observation_data[i] = new int*[seq->marginal_distribution[1]->nb_value]; + for (j = 0;j < seq->marginal_distribution[1]->nb_value;j++) { + observation_data[i][j] = new int[seq->marginal_distribution[1]->nb_value]; + } + for (j = 0;j < seq->marginal_distribution[0]->nb_value;j++) { + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + observation_data[i][j][k] = 0; + } + } + } + + // accumulation of the observation frequencies + + for (i = 0;i < seq->nb_sequence;i++) { + pstate = seq->int_sequence[i][0] + 1; + poutput = seq->int_sequence[i][1] + 1; + for (j = 1;j < seq->length[i];j++) { + observation_data[*(pstate - 1)][*pstate][*poutput]++; + pstate++; + poutput++; + } + } + + // estimation of the observation probabilities, computation of the log-likelihood and + // of the number of free parameters + + lumped_nb_parameter = lumped_markov->nb_parameter_computation() - + lumped_markov->categorical_process[0]->nb_parameter_computation(0.); + lumped_likelihood = lumped_markov->likelihood_computation(*(lumped_markov->markov_data->chain_data)); + + if (criterion == AICc) { + if (lumped_nb_parameter < cumul_length - 1) { + lumped_penalized_likelihood = lumped_likelihood - (double)(lumped_nb_parameter * cumul_length) / + (double)(cumul_length - lumped_nb_parameter - 1); + } + else { + lumped_penalized_likelihood = D_INF; + } + } + + else { + lumped_penalized_likelihood = lumped_likelihood - lumped_nb_parameter * penalty; + } + + *os << "\n" << lumped_markov->nb_state << " " << STAT_label[STATL_STATES] + << " 2 * " << SEQ_label[SEQL_MARKOV_CHAIN] << " " << STAT_label[STATL_LIKELIHOOD] << ": " + << 2 * lumped_likelihood << " " << lumped_nb_parameter << " " + << STAT_label[lumped_nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" + << STAT_criterion_word[criterion] << "): " << 2 * lumped_penalized_likelihood << endl; + + *os << "\n" << STAT_word[STATW_OBSERVATION_PROBABILITIES] << endl; + + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + for (j = 0;j < seq->marginal_distribution[0]->nb_value;j++) { + nb_output = 0; + sum = 0; + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + if (observation_data[i][j][k] > 0) { + nb_output++; + sum += observation_data[i][j][k]; + } + } + + if (nb_output > 1) { + *os << i << " -> " << j << " : "; + + lumped_nb_parameter += (nb_output - 1); + + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + if (observation_data[i][j][k] > 0) { + *os << k << " (" << (double)observation_data[i][j][k] / (double)sum << ") | "; + + lumped_likelihood += observation_data[i][j][k] * log((double)observation_data[i][j][k] / (double)sum); + } + } + + *os << endl; + } + } + } + + if (criterion == AICc) { + if (lumped_nb_parameter < cumul_length - 1) { + lumped_penalized_likelihood = lumped_likelihood - (double)(lumped_nb_parameter * cumul_length) / + (double)(cumul_length - lumped_nb_parameter - 1); + } + else { + lumped_penalized_likelihood = D_INF; + } + } + + else { + lumped_penalized_likelihood = lumped_likelihood - lumped_nb_parameter * penalty; + } + + *os << "\n" << lumped_markov->nb_state << " " << STAT_label[STATL_STATES] + << " 2 * " << STAT_label[STATL_LIKELIHOOD] << ": " << 2 * lumped_likelihood << " " + << lumped_nb_parameter << " " << STAT_label[lumped_nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" + << STAT_criterion_word[criterion] << "): " << 2 * lumped_penalized_likelihood << endl; + + // 3rd lumpability property (output-state-dependent observation probabilities) + + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + for (j = 0;j < seq->marginal_distribution[1]->nb_value;j++) { + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + observation_data[i][j][k] = 0; + } + } + } + + // accumulation of the observation frequencies + + for (i = 0;i < seq->nb_sequence;i++) { + pstate = seq->int_sequence[i][0] + 1; + poutput = seq->int_sequence[i][1] + 1; + for (j = 1;j < seq->length[i];j++) { + observation_data[*pstate][*(poutput - 1)][*poutput]++; + pstate++; + poutput++; + } + } + + // estimation of the observation probabilities, computation of the log-likelihood and + // of the number of free parameters + + lumped_nb_parameter = lumped_markov->nb_parameter_computation() - + lumped_markov->categorical_process[0]->nb_parameter_computation(0.); + lumped_likelihood = lumped_markov->likelihood_computation(*(lumped_markov->markov_data->chain_data)); + + *os << "\n" << STAT_word[STATW_OBSERVATION_PROBABILITIES] << endl; + + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + for (j = 0;j < seq->marginal_distribution[1]->nb_value;j++) { + nb_output = 0; + sum = 0; + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + if (observation_data[i][j][k] > 0) { + nb_output++; + sum += observation_data[i][j][k]; + } + } + + if (nb_output > 1) { + *os << j << ", " << i << " : "; + + lumped_nb_parameter += (nb_output - 1); + + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + if (observation_data[i][j][k] > 0) { + *os << k << " (" << (double)observation_data[i][j][k] / (double)sum << ") | "; + + lumped_likelihood += observation_data[i][j][k] * log((double)observation_data[i][j][k] / (double)sum); + } + } + + *os << endl; + } + } + } + + if (criterion == AICc) { + if (lumped_nb_parameter < cumul_length - 1) { + lumped_penalized_likelihood = lumped_likelihood - (double)(lumped_nb_parameter * cumul_length) / + (double)(cumul_length - lumped_nb_parameter - 1); + } + else { + lumped_penalized_likelihood = D_INF; + } + } + + else { + lumped_penalized_likelihood = lumped_likelihood - lumped_nb_parameter * penalty; + } + + *os << "\n" << lumped_markov->nb_state << " " << STAT_label[STATL_STATES] + << " 2 * " << STAT_label[STATL_LIKELIHOOD] << ": " << 2 * lumped_likelihood << " " + << lumped_nb_parameter << " " << STAT_label[lumped_nb_parameter == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" + << STAT_criterion_word[criterion] << "): " << 2 * lumped_penalized_likelihood << endl; + + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + for (j = 0;j < seq->marginal_distribution[1]->nb_value;j++) { + delete [] observation_data[i][j]; + } + delete [] observation_data[i]; + } + delete [] observation_data; + } + + nb_state[0] = lumped_markov->nb_state; + nb_parameter[0] = lumped_markov->nb_parameter_computation(); + likelihood[0] = lumped_markov->markov_data->likelihood - + lumped_markov->likelihood_correction(*(lumped_markov->markov_data)); + + if (criterion == AICc) { + if (nb_parameter[0] < cumul_length - 1) { + penalized_likelihood[0] = likelihood[0] - (double)(nb_parameter[0] * cumul_length) / + (double)(cumul_length - nb_parameter[0] - 1); + } + else { + penalized_likelihood[0] = D_INF; + } + } + + else { + penalized_likelihood[0] = likelihood[0] - nb_parameter[0] * penalty; + } + +# ifdef DEBUG + // if (penalized_likelihood[0] > max_likelihood) { + // markov->ascii_write(os, seq); + // } + // else { + // lumped_markov->ascii_write(os, seq); + //} +# endif + + if (penalized_likelihood[0] > max_likelihood) { + max_likelihood = penalized_likelihood[0]; + delete markov; + markov = lumped_markov; + } + else { + delete lumped_markov; + } + +# ifdef DEBUG + lumpability_test(error , *os, category , order); +# endif + + if (os) { +/* double norm = 0. , weight[2]; + + for (i = 0;i < 2;i++) { + weight[i] = exp(penalized_likelihood[i] - max_likelihood); + norm += weight[i]; + } */ + + for (i = 0;i < 2;i++) { + *os << "\n" << nb_state[i] << " " << STAT_label[STATL_STATES] + << " 2 * " << STAT_label[STATL_LIKELIHOOD] << ": " << 2 * likelihood[i] << " " + << nb_parameter[i] << " " << STAT_label[nb_parameter[i] == 1 ? STATL_FREE_PARAMETER : STATL_FREE_PARAMETERS] + << " 2 * " << STAT_label[STATL_PENALIZED_LIKELIHOOD] << " (" + << STAT_criterion_word[criterion] << "): " << 2 * penalized_likelihood[i] << endl; +// << " " << STAT_label[STATL_WEIGHT] << ": " << weight[i] / norm << endl; + } + } + } + + delete seq; + } + + // computation of the characteristic distributions of the model + + markov->component_computation(); + markov->characteristic_computation(*(markov->markov_data) , counting_flag , I_DEFAULT , false); + } + + return markov; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Test of state lumpability for a Markov chain. + * + * \param[in] error reference on a StatError object, + * \param[in] os stream for displaying the test results, + * \param[in] category transcoding table, + * \param[in] order Markov chain order. + * + * \return error status. + */ +/*--------------------------------------------------------------*/ + +bool MarkovianSequences::lumpability_test(StatError &error , ostream &os , + int *category , int order) const + +{ + bool status = true , *presence; + int i , j , k; + int max_category , df , sum , nb_output , lumped_nb_parameter , *ftransition , *pstate , + *poutput , ***observation_data;; + double value , var1 , var2 , lumped_likelihood , ***observation_proba; + Test *test; + VariableOrderMarkov *markov , *lumped_markov; + MarkovianSequences *seq; + + + error.init(); + + if (nb_variable > 1) { + status = false; + error.correction_update(STAT_error[STATR_NB_VARIABLE] , 1); + } + + if ((type[0] != INT_VALUE) && (type[0] != STATE)) { + status = false; + ostringstream correction_message; + correction_message << STAT_variable_word[INT_VALUE] << " or " << STAT_variable_word[STATE]; + error.correction_update(STAT_error[STATR_VARIABLE_TYPE] , (correction_message.str()).c_str()); + } + + else { + if ((marginal_distribution[0]->nb_value < 2) || + (marginal_distribution[0]->nb_value > NB_STATE)) { + status = false; + error.update(SEQ_error[SEQR_NB_STATE]); + } + + else if (!characteristics[0]) { + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if (marginal_distribution[0]->frequency[i] == 0) { + status = false; + ostringstream error_message; + error_message << SEQ_error[SEQR_MISSING_STATE] << " " << i; + error.update((error_message.str()).c_str()); + } + } + } + + max_category = 0; + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + if ((category[i] < 0) || (category[i] >= marginal_distribution[0]->nb_value - 1)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_CATEGORY] << " " << category[i] << " " + << STAT_error[STATR_NOT_ALLOWED]; + error.update((error_message.str()).c_str()); + } + else if (category[i] > max_category) { + max_category = category[i]; + } + } + + if (max_category == 0) { + status = false; + error.update(STAT_error[STATR_NB_CATEGORY]); + } + + if (status) { + presence = new bool[max_category + 1]; + for (i = 0;i <= max_category;i++) { + presence[i] = false; + } + + for (i = 0;i < marginal_distribution[0]->nb_value;i++) { + presence[category[i]] = true; + } + + for (i = 0;i <= max_category;i++) { + if (!presence[i]) { + status = false; + ostringstream error_message; + error_message << STAT_error[STATR_MISSING_CATEGORY] << " " << i; + error.update((error_message.str()).c_str()); + } + } + + delete [] presence; + } + } + + if ((order < 1) || (order > ORDER)) { + status = false; + error.update(SEQ_error[SEQR_ORDER]); + } + + if (status) { +// markov = variable_order_markov_estimation(error , type , order , true , false); + markov = variable_order_markov_estimation(error , ORDINARY , order , true , false); + + seq = transcode(error , 1 , category , true); + +// lumped_markov = seq->variable_order_markov_estimation(error , type , order , true , false); + lumped_markov = seq->variable_order_markov_estimation(error , ORDINARY , order , true , false); + + df = markov->nb_parameter_computation() - lumped_markov->nb_parameter_computation(); + + value = 0.; + + for (i = 1;i < markov->nb_row;i++) { + if (markov->memo_type[i] == TERMINAL) { + ftransition = markov->markov_data->chain_data->transition[i]; + sum = 0; + for (j = 0;j < markov->nb_state;j++) { + sum += *ftransition++; + } + + if (sum > 0) { + for (j = 1;j < lumped_markov->nb_row;j++) { + if ((lumped_markov->memo_type[j] == TERMINAL) && + (lumped_markov->order[j] == markov->order[i])) { + for (k = 0;k < lumped_markov->order[j];k++) { + if (lumped_markov->state[j][k] != category[markov->state[i][k]]) { + break; + } + } + + if (k == lumped_markov->order[j]) { + ftransition = markov->markov_data->chain_data->transition[i]; + for (k = 0;k < markov->nb_state;k++) { + var1 = (double)sum * lumped_markov->categorical_process[0]->observation[category[k]]->mass[k] * + lumped_markov->transition[j][category[k]]; + if (var1 > 0.) { + var2 = *ftransition - var1; + value += var2 * var2 / var1; + } + ftransition++; + } + break; + } + } + } + } + } + } + + test = new Test(CHI2 , true , df , I_DEFAULT , value); + + test->chi2_critical_probability_computation(); + + os << *test; + + delete test; + + value = 2 * (markov->markov_data->likelihood - markov->likelihood_correction(*(markov->markov_data)) - + (lumped_markov->markov_data->likelihood - lumped_markov->likelihood_correction(*(lumped_markov->markov_data)))); + + test = new Test(CHI2 , true , df , I_DEFAULT , value); + + test->chi2_critical_probability_computation(); + + os << "\n" << SEQ_label[SEQL_LIKELIHOOD_RATIO_TEST] << "\n" << *test; + + delete test; + + // 2eme lumpability property (two-state- i.e. transition-dependent observation probabilities) + + observation_data = new int**[seq->marginal_distribution[0]->nb_value]; + observation_proba = new double**[seq->marginal_distribution[0]->nb_value]; + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + observation_data[i] = new int*[seq->marginal_distribution[1]->nb_value]; + observation_proba[i] = new double*[seq->marginal_distribution[1]->nb_value]; + for (j = 0;j < seq->marginal_distribution[1]->nb_value;j++) { + observation_data[i][j] = new int[seq->marginal_distribution[1]->nb_value]; + observation_proba[i][j] = new double[seq->marginal_distribution[1]->nb_value]; + } + for (j = 0;j < seq->marginal_distribution[0]->nb_value;j++) { + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + observation_data[i][j][k] = 0; + } + } + } + + // accumulation of the observation frequencies + + for (i = 0;i < seq->nb_sequence;i++) { + pstate = seq->int_sequence[i][0] + 1; + poutput = seq->int_sequence[i][1] + 1; + for (j = 1;j < seq->length[i];j++) { + observation_data[*(pstate - 1)][*pstate][*poutput]++; + pstate++; + poutput++; + } + } + + // estimation of the observation probabilities, computation of the log-likelihood and + // of the number of free parameters + + lumped_nb_parameter = lumped_markov->nb_parameter_computation() - + lumped_markov->categorical_process[0]->nb_parameter_computation(0.); + lumped_likelihood = lumped_markov->likelihood_computation(*(lumped_markov->markov_data->chain_data)); + + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + for (j = 0;j < seq->marginal_distribution[0]->nb_value;j++) { + nb_output = 0; + sum = 0; + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + if (observation_data[i][j][k] > 0) { + nb_output++; + sum += observation_data[i][j][k]; + } + } + + if (nb_output > 1) { + lumped_nb_parameter += (nb_output - 1); + + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + if (observation_data[i][j][k] > 0) { + lumped_likelihood += observation_data[i][j][k] * log((double)observation_data[i][j][k] / (double)sum); + } + } + } + + if (sum > 0) { + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + observation_proba[i][j][k] = (double)observation_data[i][j][k] / (double)sum; + } + } + } + } + + df = markov->nb_parameter_computation() - lumped_nb_parameter; + + value = 0.; + + for (i = 1;i < markov->nb_row;i++) { + if (markov->memo_type[i] == TERMINAL) { + ftransition = markov->markov_data->chain_data->transition[i]; + sum = 0; + for (j = 0;j < markov->nb_state;j++) { + sum += *ftransition++; + } + + if (sum > 0) { + for (j = 1;j < lumped_markov->nb_row;j++) { + if ((lumped_markov->memo_type[j] == TERMINAL) && + (lumped_markov->order[j] == markov->order[i])) { + for (k = 0;k < lumped_markov->order[j];k++) { + if (lumped_markov->state[j][k] != category[markov->state[i][k]]) { + break; + } + } + + if (k == lumped_markov->order[j]) { + ftransition = markov->markov_data->chain_data->transition[i]; + for (k = 0;k < markov->nb_state;k++) { + var1 = (double)sum * observation_proba[category[markov->state[i][0]]][category[k]][k] * + lumped_markov->transition[j][category[k]]; + if (var1 > 0.) { + var2 = *ftransition - var1; + value += var2 * var2 / var1; + } + ftransition++; + } + break; + } + } + } + } + } + } + + test = new Test(CHI2 , true , df , I_DEFAULT , value); + + test->chi2_critical_probability_computation(); + + os << "\n" << *test; + + delete test; + + value = 2 * (markov->markov_data->likelihood - markov->likelihood_correction(*(markov->markov_data)) - lumped_likelihood); + + test = new Test(CHI2 , true , df , I_DEFAULT , value); + + test->chi2_critical_probability_computation(); + + os << "\n" << SEQ_label[SEQL_LIKELIHOOD_RATIO_TEST] << "\n" << *test; + + delete test; + + // 3rd lumpability property (output-state-dependent observation probabilities) + + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + for (j = 0;j < seq->marginal_distribution[1]->nb_value;j++) { + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + observation_data[i][j][k] = 0; + } + } + } + + // accumulation of the observation frequencies + + for (i = 0;i < seq->nb_sequence;i++) { + pstate = seq->int_sequence[i][0] + 1; + poutput = seq->int_sequence[i][1] + 1; + for (j = 1;j < seq->length[i];j++) { + observation_data[*pstate][*(poutput - 1)][*poutput]++; + pstate++; + poutput++; + } + } + + // estimation of the observation probabilities, computation of the log-likelihood and + // of the number of free parameters + + lumped_nb_parameter = lumped_markov->nb_parameter_computation() - + lumped_markov->categorical_process[0]->nb_parameter_computation(0.); + lumped_likelihood = lumped_markov->likelihood_computation(*(lumped_markov->markov_data->chain_data)); + + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + for (j = 0;j < seq->marginal_distribution[1]->nb_value;j++) { + nb_output = 0; + sum = 0; + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + if (observation_data[i][j][k] > 0) { + nb_output++; + sum += observation_data[i][j][k]; + } + } + + if (nb_output > 1) { + lumped_nb_parameter += (nb_output - 1); + + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + if (observation_data[i][j][k] > 0) { + lumped_likelihood += observation_data[i][j][k] * log((double)observation_data[i][j][k] / (double)sum); + } + } + } + + if (sum > 0) { + for (k = 0;k < seq->marginal_distribution[1]->nb_value;k++) { + observation_proba[i][j][k] = (double)observation_data[i][j][k] / (double)sum; + } + } + } + } + + df = markov->nb_parameter_computation() - lumped_nb_parameter; + + value = 0.; + + for (i = 1;i < markov->nb_row;i++) { + if (markov->memo_type[i] == TERMINAL) { + ftransition = markov->markov_data->chain_data->transition[i]; + sum = 0; + for (j = 0;j < markov->nb_state;j++) { + sum += *ftransition++; + } + + if (sum > 0) { + for (j = 1;j < lumped_markov->nb_row;j++) { + if ((lumped_markov->memo_type[j] == TERMINAL) && + (lumped_markov->order[j] == markov->order[i])) { + for (k = 0;k < lumped_markov->order[j];k++) { + if (lumped_markov->state[j][k] != category[markov->state[i][k]]) { + break; + } + } + + if (k == lumped_markov->order[j]) { + ftransition = markov->markov_data->chain_data->transition[i]; + for (k = 0;k < markov->nb_state;k++) { + var1 = (double)sum * observation_proba[category[k]][markov->state[i][0]][k] * + lumped_markov->transition[j][category[k]]; + if (var1 > 0.) { + var2 = *ftransition - var1; + value += var2 * var2 / var1; + } + ftransition++; + } + break; + } + } + } + } + } + } + + test = new Test(CHI2 , true , df , I_DEFAULT , value); + + test->chi2_critical_probability_computation(); + + os << "\n" << *test; + + delete test; + + value = 2 * (markov->markov_data->likelihood - markov->likelihood_correction(*(markov->markov_data)) - lumped_likelihood); + + test = new Test(CHI2 , true , df , I_DEFAULT , value); + + test->chi2_critical_probability_computation(); + + os << "\n" << SEQ_label[SEQL_LIKELIHOOD_RATIO_TEST] << "\n" << *test; + + delete test; + + for (i = 0;i < seq->marginal_distribution[0]->nb_value;i++) { + for (j = 0;j < seq->marginal_distribution[1]->nb_value;j++) { + delete [] observation_data[i][j]; + delete [] observation_proba[i][j]; + } + delete [] observation_data[i]; + delete [] observation_proba[i]; + } + delete [] observation_data; + delete [] observation_proba; + + delete markov; + delete seq; + delete lumped_markov; + } + + return status; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/vomc_distributions1.cpp b/src/cpp/sequence_analysis/vomc_distributions1.cpp new file mode 100644 index 0000000..8458a3e --- /dev/null +++ b/src/cpp/sequence_analysis/vomc_distributions1.cpp @@ -0,0 +1,1958 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include + +#include "variable_order_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the state probabilities as a function of + * the index parameter for a variable-order Markov chain. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::index_state_distribution() + +{ + int i , j , k; + double *memory , *previous_memory; + Curves *index_state; + + + index_state = state_process->index_value; + + // initialization of the probabilities of the memories and the states + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + switch (type) { + + case ORDINARY : { + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + index_state->point[state[i][0]][0] = initial[state[i][0]]; + memory[i] = initial[state[i][0]]; + } + else { + memory[i] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (i = 0;i < nb_state;i++) { + index_state->point[i][0] = 0.; + } + + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + index_state->point[state[i][0]][0] += initial[i]; + memory[i] = initial[i]; + } + else { + memory[i] = 0.; + } + } + break; + } + } + + // computation of the state probabilities as a function of the index parameter + + for (i = 1;i < index_state->length;i++) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + for (k = 0;k < nb_memory[j];k++) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + + // computation of the state probabilities + + for (j = 0;j < nb_state;j++) { + index_state->point[j][i] = 0.; + } + for (j = 1;j < nb_row;j++) { + index_state->point[state[j][0]][i] += memory[j]; + } + } + + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probabilities of the memories for an ordinary variable-order Markov chain + * taking account of the sequence length distribution. + * + * \return memory probabilities. + */ +/*--------------------------------------------------------------*/ + +double* VariableOrderMarkovChain::memory_computation() const + +{ + int i , j , k; + double *average_memory , *memory , *previous_memory; + + + average_memory = new double[nb_row]; + for (i = 1;i < nb_row;i++) { + average_memory[i] = 0.; + } + + // initialization of the probabilities of the memories + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { +// average_memory[i] += initial[state[i][0]]; + memory[i] = initial[state[i][0]]; + } + else { + memory[i] = 0.; + } + } + + // computation of the probabilities of the memories as a function of the index parameter + + for (i = 1;i < state_process->length->nb_value - 2;i++) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + for (k = 0;k < nb_memory[j];k++) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + + // accumulation of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + average_memory[j] += memory[j] * (1. - state_process->length->cumul[i]); + } + } + + delete [] memory; + delete [] previous_memory; + + return average_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of not visiting a state + * for an ordinary variable-order Markov chain. + * + * \param[in] istate state, + * \param[in] increment threshold on the sum of the probabilities of the memories. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::state_no_occurrence_probability(int istate , double increment) + +{ + int i; + + for (i = 0;i < nb_state;i++) { + if ((i != istate) && (!accessibility[i][istate])) { + break; + } + } + + if (i < nb_state) { + int j , k; + double memory_sum , *memory , *previous_memory , + &no_occurrence = state_process->no_occurrence[istate]; + + + // initialization of the probabilities of the memories + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + memory_sum = 0.; + no_occurrence = 0.; + + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + if (state[i][0] != istate) { + if (accessibility[state[i][0]][istate]) { + memory[i] = initial[state[i][0]]; + memory_sum += memory[i]; + } + else { + memory[i] = 0.; + no_occurrence += initial[state[i][0]]; + } + } + + else { + memory[i] = 0.; + } + } + + else { + memory[i] = 0.; + } + } + + i = 1; + + while ((memory_sum > increment) || (i < (nb_state - 1) * max_order)) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and update of + // the probability of not visiting the selected state + + memory_sum = 0.; + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + + for (k = 1;k < MIN(i , order[j]);k++) { + if ((state[j][k] == istate) || (!accessibility[state[j][k]][istate])) { + break; + } + } + + if ((k == MIN(i , order[j])) && (state[j][0] != istate)) { + if (accessibility[state[j][0]][istate]) { + for (k = 0;k < nb_memory[j];k++) { + if (state[previous[j][k]][0] != istate) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + + memory_sum += memory[j]; + } + + else { + for (k = 0;k < nb_memory[j];k++) { + if (state[previous[j][k]][0] != istate) { + no_occurrence += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + } + } + } + + i++; + } + + delete [] memory; + delete [] previous_memory; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the time to the 1st occurrence of + * a state for a variable-order Markov chain. + * + * \param[in] istate state, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::state_first_occurrence_distribution(int istate , int min_nb_value , + double cumul_threshold) + +{ + int i , j , k; + double *memory , *previous_memory , *pmass , *pcumul; + Distribution *first_occurrence; + + + first_occurrence = state_process->first_occurrence[istate]; + first_occurrence->complement = state_process->no_occurrence[istate]; + + pmass = first_occurrence->mass; + pcumul = first_occurrence->cumul; + + // initialization of the probabilities of the memories + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + switch (type) { + + case ORDINARY : { + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + if (state[i][0] != istate) { + memory[i] = initial[state[i][0]]; + } + else { + memory[i] = 0.; + *pmass = initial[state[i][0]]; + } + } + + else { + memory[i] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + *pmass = 0.; + + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + if (state[i][0] != istate) { + memory[i] = initial[i]; + } + else { + memory[i] = 0.; + *pmass += initial[i]; + } + } + + else { + memory[i] = 0.; + } + } + break; + } + } + + *pcumul = *pmass; + + i = 1; + + while (((*pcumul < cumul_threshold - first_occurrence->complement) || (i < min_nb_value)) && + (i < first_occurrence->alloc_nb_value)) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and the current probability mass + + *++pmass = 0.; + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + + for (k = 1;k < MIN(i , order[j]);k++) { + if (state[j][k] == istate) { + break; + } + } + + if (k == MIN(i , order[j])) { + if (state[j][0] != istate) { + for (k = 0;k < nb_memory[j];k++) { + if (state[previous[j][k]][0] != istate) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + } + + else { + for (k = 0;k < nb_memory[j];k++) { + if (state[previous[j][k]][0] != istate) { + *pmass += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + } + } + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + + first_occurrence->nb_value = i; + +# ifdef DEBUG + if (first_occurrence->complement > 0.) { + cout << "\n" << SEQ_label[SEQL_NO_OCCURRENCE] << " " << istate << " : " + << first_occurrence->complement << " | " + << 1. - first_occurrence->cumul[first_occurrence->nb_value - 1] << endl; + } +# endif + + first_occurrence->offset_computation(); + first_occurrence->max_computation(); + first_occurrence->mean_computation(); + first_occurrence->variance_computation(); + + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of leaving definitively a state + * for an ordinary variable-order Markov chain. + * + * \param[in] imemory memory distribution, + * \param[in] istate state, + * \param[in] increment threshold on the sum of the probabilities of the memories. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::state_leave_probability(const double *imemory , int istate , + double increment) + +{ + if (stype[istate] == TRANSIENT) { + int i , j , k; + double memory_sum , *memory , *previous_memory , + &leave = state_process->leave[istate]; + + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + // initialization of the probabilities of the memories + + memory_sum = 0.; + + for (i = 1;i < nb_row;i++) { + if (state[i][0] == istate) { + memory[i] = imemory[i]; + if (order[i] == 1) { + memory[i] += initial[state[i][0]]; + } + memory_sum += memory[i]; + } + + else { + memory[i] = 0.; + } + } + + for (i = 1;i < nb_row;i++) { + if (state[i][0] == istate) { + memory[i] /= memory_sum; + } + } + + leave = 0.; + i = 1; + + do { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and update of + // the probability of leaving definitively the selected state + + memory_sum = 0.; + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + + for (k = 1;k < MIN(i , order[j]);k++) { + if ((state[j][k] == istate) || (!accessibility[state[j][k]][istate])) { + break; + } + } + + if ((((k == i) && (i < order[j]) && (state[j][k] == istate)) || + (k == order[j])) && (state[j][0] != istate)) { + if (accessibility[state[j][0]][istate]) { + for (k = 0;k < nb_memory[j];k++) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + memory_sum += memory[j]; + } + + else { + for (k = 0;k < nb_memory[j];k++) { + leave += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + } + } + + i++; + } + while ((memory_sum > increment) || (i < (nb_state - 1) * max_order)); + + delete [] memory; + delete [] previous_memory; + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the recurrence time in a state + * for a variable-order Markov chain. + * + * \param[in] imemory memory distribution, + * \param[in] istate state, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::state_recurrence_time_distribution(const double *imemory , int istate , + int min_nb_value , double cumul_threshold) + +{ + int i , j , k; + double sum , *memory , *previous_memory , *pmass , *pcumul; + Distribution *recurrence_time; + + + recurrence_time = state_process->recurrence_time[istate]; + recurrence_time->complement = state_process->leave[istate]; + + pmass = recurrence_time->mass; + pcumul = recurrence_time->cumul; + *pmass = 0.; + *pcumul = 0.; + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + // initialization of the probabilities of the memories + + sum = 0.; + + for (i = 1;i < nb_row;i++) { + if (state[i][0] == istate) { + memory[i] = imemory[i]; + if ((type == ORDINARY) && (order[i] == 1)) { + memory[i] += initial[state[i][0]]; + } + sum += memory[i]; + } + + else { + memory[i] = 0.; + } + } + + for (i = 1;i < nb_row;i++) { + if (state[i][0] == istate) { + memory[i] /= sum; + } + } + + i = 1; + + do { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and the current probability mass + + *++pmass = 0.; + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + + for (k = 1;k < MIN(i , order[j]);k++) { + if (state[j][k] == istate) { + break; + } + } + + if (((k == i) && (i < order[j]) && (state[j][k] == istate)) || (k == order[j])) { + if (state[j][0] != istate) { + for (k = 0;k < nb_memory[j];k++) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + + else { + for (k = 0;k < nb_memory[j];k++) { + *pmass += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + } + } + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + while (((*pcumul < cumul_threshold - recurrence_time->complement) || (i < min_nb_value)) && + (i < recurrence_time->alloc_nb_value)); + + recurrence_time->nb_value = i; + recurrence_time->nb_value_computation(); + + if (recurrence_time->nb_value > 0) { + recurrence_time->offset_computation(); + recurrence_time->max_computation(); + recurrence_time->mean_computation(); + recurrence_time->variance_computation(); + } + + else { + delete state_process->recurrence_time[istate]; + state_process->recurrence_time[istate] = NULL; + } + + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the sojourn time in a state + * for a variable-order Markov chain. + * + * \param[in] imemory memory distribution, + * \param[in] istate state, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::state_sojourn_time_distribution(const double *imemory , int istate , + int min_nb_value , double cumul_threshold) + +{ + int i , j , k; + int self_index; + double sum , *memory , *previous_memory , *pmass , *pcumul; + DiscreteParametric *sojourn_time; + + + sojourn_time = state_process->sojourn_time[istate]; + + pmass = sojourn_time->mass; + pcumul = sojourn_time->cumul; + *pmass = 0.; + *pcumul = 0.; + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + // initialization of the probabilities of the memories + + sum = 0.; + + for (i = 1;i < nb_row;i++) { + if ((state[i][0] == istate) && ((order[i] == 1) || + ((order[i] > 1) && (state[i][1] != istate)))) { + memory[i] = imemory[i]; + if ((type == ORDINARY) && (order[i] == 1)) { + memory[i] += initial[state[i][0]]; + } + sum += memory[i]; + } + + else { + memory[i] = 0.; + } + } + + for (i = 1;i < nb_row;i++) { + if ((state[i][0] == istate) && ((order[i] == 1) || + ((order[i] > 1) && (state[i][1] != istate)))) { + memory[i] /= sum; + } + } + + // sojourn time < maximum order of the Markov chain + + for (i = 1;i < max_order;i++) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and the current probability mass + + *++pmass = 0.; + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + + for (k = 0;k <= MIN(i , order[j] - 1);k++) { + if (state[j][k] != istate) { + break; + } + } + + if (((k == i + 1) && (i < order[j] - 1) && (state[j][k] != istate)) || (k == order[j])) { + for (k = 0;k < nb_memory[j];k++) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + *pmass += (1. - transition[previous[j][k]][state[j][0]]) * previous_memory[previous[j][k]]; + } + } + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + } + + // computation of the probability masses of the geometric tail + + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + for (k = 0;k < order[j];k++) { + if (state[j][k] != istate) { + break; + } + } + + if (k == order[j]) { + self_index = j; + break; + } + } + } + + while (((*pcumul < cumul_threshold) || (i < min_nb_value)) && + (i < sojourn_time->alloc_nb_value)) { + *++pmass = memory[self_index] * (1. - transition[self_index][istate]); + memory[self_index] *= transition[self_index][istate]; + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + sojourn_time->nb_value = i; + + while (*pmass-- == 0.) { + (sojourn_time->nb_value)--; + } + + sojourn_time->offset_computation(); + sojourn_time->max_computation(); + sojourn_time->mean_computation(); + sojourn_time->variance_computation(); + + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the observation probabilities as a function of + * the index parameter for a hidden variable-order Markov chain. + * + * \param[in] variable observation process index. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::index_output_distribution(int variable) + +{ + int i , j , k; + Curves *index_state , *index_value; + + + index_value = categorical_process[variable]->index_value; + + // computation of the state probabilities + + if (!(state_process->index_value)) { + state_process->index_value = new Curves(nb_state , index_value->length); + index_state_distribution(); + } + index_state = state_process->index_value; + + // incorporation of the observation probabilities + + for (i = 0;i < index_value->length;i++) { + for (j = 0;j < categorical_process[variable]->nb_value;j++) { + index_value->point[j][i] = 0.; + for (k = 0;k < nb_state;k++) { + index_value->point[j][i] += categorical_process[variable]->observation[k]->mass[j] * + index_state->point[k][i]; + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of not observing a value for + * for a hidden ordinary variable-order Markov chain. + * + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] increment threshold on the sum of the probabilities of the memories. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::output_no_occurrence_probability(int variable , int output , + double increment) + +{ + bool status = false , *output_accessibility; + int i , j , k; + double memory_sum , sum , *observation , *memory , *previous_memory , + &no_occurrence = categorical_process[variable]->no_occurrence[output]; + + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + // computation of the accessibility of the selected observation from a given state + + output_accessibility = new bool[nb_state]; + + for (i = 0;i < nb_state;i++) { + output_accessibility[i] = false; + + for (j = 0;j < nb_state;j++) { + if (j == i) { + if (observation[j] > 0.) { + output_accessibility[i] = true; + break; + } + } + + else { + if ((accessibility[i][j]) && (observation[j] > 0.)) { + output_accessibility[i] = true; + break; + } + } + } + + if (!output_accessibility[i]) { + status = true; + } + } + + if (status) { + + // initialization of the probabilities of the memories + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + memory_sum = 0.; + no_occurrence = 0.; + + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + if (output_accessibility[state[i][0]]) { + memory[i] = (1. - observation[state[i][0]]) * initial[state[i][0]]; + memory_sum += memory[i]; + } + else { + memory[i] = 0.; + no_occurrence += initial[state[i][0]]; + } + } + + else { + memory[i] = 0.; + } + } + + i = 1; + + while ((memory_sum > increment) || (i < nb_state * max_order)) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and update of + // the probability of not observing the selected observation + + memory_sum = 0.; + + for (j = 1;j < nb_row;j++) { + sum = 0.; + for (k = 0;k < nb_memory[j];k++) { + sum += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + + if (output_accessibility[state[j][0]]) { + memory[j] = (1. - observation[state[j][0]]) * sum; + memory_sum += memory[j]; + } + else { + memory[j] = 0.; + no_occurrence += sum; + } + } + + i++; + } + + delete [] memory; + delete [] previous_memory; + } + + delete [] observation; + delete [] output_accessibility; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the time to the 1st occurrence of + * a categorical observation for a hidden variable-order Markov chain. + * + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::output_first_occurrence_distribution(int variable , int output , + int min_nb_value , + double cumul_threshold) + +{ + int i , j , k; + double sum , *observation , *memory , *previous_memory , *pmass , *pcumul; + Distribution *first_occurrence; + + + first_occurrence = categorical_process[variable]->first_occurrence[output]; + first_occurrence->complement = categorical_process[variable]->no_occurrence[output]; + + pmass = first_occurrence->mass; + pcumul = first_occurrence->cumul; + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + // initialization of the probabilities of the memories + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + *pmass = 0.; + + switch (type) { + + case ORDINARY : { + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + memory[i] = (1. - observation[state[i][0]]) * initial[state[i][0]]; + *pmass += observation[state[i][0]] * initial[state[i][0]]; + } + else { + memory[i] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (i = 1;i < nb_row;i++) { + if (!child[i]) { + memory[i] = (1. - observation[state[i][0]]) * initial[i]; + *pmass += observation[state[i][0]] * initial[i]; + } + else { + memory[i] = 0.; + } + } + break; + } + } + + *pcumul = *pmass; + + i = 1; + + while (((*pcumul < cumul_threshold - first_occurrence->complement) || (i < min_nb_value)) && + (i < first_occurrence->alloc_nb_value)) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and the current probability mass + + *++pmass = 0.; + + for (j = 1;j < nb_row;j++) { + sum = 0.; + for (k = 0;k < nb_memory[j];k++) { + sum += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + + memory[j] = (1. - observation[state[j][0]]) * sum; + *pmass += observation[state[j][0]] * sum; + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + + first_occurrence->nb_value = i; + + first_occurrence->offset_computation(); + first_occurrence->max_computation(); + first_occurrence->mean_computation(); + first_occurrence->variance_computation(); + + delete [] observation; + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the probability of leaving definitively a categorical observation + * for a hidden ordinary variable-order Markov chain. + * + * \param[in] imemory memory distribution, + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] increment threshold on the sum of the probabilities of the memories. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::output_leave_probability(const double *imemory , int variable , + int output , double increment) + +{ + bool status = false , *output_accessibility; + int i , j , k; + double memory_sum , sum , *observation , *memory , *previous_memory , + &leave = categorical_process[variable]->leave[output]; + + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + // computation of the accessibility of the selected observation from a given state + + output_accessibility = new bool[nb_state]; + + for (i = 0;i < nb_state;i++) { + output_accessibility[i] = false; + + for (j = 0;j < nb_state;j++) { + if (j == i) { + if (observation[j] > 0.) { + output_accessibility[i] = true; + break; + } + } + + else { + if ((accessibility[i][j]) && (observation[j] > 0.)) { + output_accessibility[i] = true; + break; + } + } + } + + if (!output_accessibility[i]) { + status = true; + } + } + + if (status) { + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + // initialization of the probabilities of the memories + + memory_sum = 0.; + + for (i = 1;i < nb_row;i++) { + memory[i] = imemory[i]; + if (order[i] == 1) { + memory[i] += initial[state[i][0]]; + } + memory[i] *= observation[state[i][0]]; + + memory_sum += memory[i]; + } + + for (i = 1;i < nb_row;i++) { + memory[i] /= memory_sum; + } + + leave = 0.; + i = 1; + + do { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and update of + // the probability of leaving definitively the selected observation + + memory_sum = 0.; + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + + if (observation[state[j][0]] < 1.) { + sum = 0.; + for (k = 0;k < nb_memory[j];k++) { + sum += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + + if (output_accessibility[state[j][0]]) { + memory[j] = (1. - observation[state[j][0]]) * sum; + memory_sum += memory[j]; + } + else { + leave += sum; + } + } + } + + i++; + } + while ((memory_sum > increment) || (i < nb_state * max_order)); + + delete [] memory; + delete [] previous_memory; + } + + delete [] observation; + delete [] output_accessibility; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the recurrence time in a categorical observation + * for a hidden variable-order Markov chain. + * + * \param[in] imemory memory distribution, + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::output_recurrence_time_distribution(const double *imemory , int variable , + int output , int min_nb_value , + double cumul_threshold) + +{ + int i , j , k; + double sum , *observation , *memory , *previous_memory , *pmass , *pcumul; + Distribution *recurrence_time; + + + recurrence_time = categorical_process[variable]->recurrence_time[output]; + recurrence_time->complement = categorical_process[variable]->leave[output]; + + pmass = recurrence_time->mass; + pcumul = recurrence_time->cumul; + *pmass = 0.; + *pcumul = 0.; + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + // initialization of the probabilities of the memories + + sum = 0.; + + for (i = 1;i < nb_row;i++) { + memory[i] = imemory[i]; + if ((type == ORDINARY) && (order[i] == 1)) { + memory[i] += initial[state[i][0]]; + } + memory[i] *= observation[state[i][0]]; + + sum += memory[i]; + } + + for (i = 1;i < nb_row;i++) { + memory[i] /= sum; + } + + i = 1; + + do { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and the current probability mass + + *++pmass = 0.; + + for (j = 1;j < nb_row;j++) { + sum = 0.; + for (k = 0;k < nb_memory[j];k++) { + sum += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + + memory[j] = (1. - observation[state[j][0]]) * sum; + *pmass += observation[state[j][0]] * sum; + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + while (((*pcumul < cumul_threshold - recurrence_time->complement) || (i < min_nb_value)) && + (i < recurrence_time->alloc_nb_value)); + + recurrence_time->nb_value = i; + recurrence_time->nb_value_computation(); + + if (recurrence_time->nb_value > 0) { + recurrence_time->offset_computation(); + recurrence_time->max_computation(); + recurrence_time->mean_computation(); + recurrence_time->variance_computation(); + } + + else { + delete categorical_process[variable]->recurrence_time[output]; + categorical_process[variable]->recurrence_time[output] = NULL; + } + + delete [] observation; + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the distribution of the sojourn time in a categorical observation + * for a hidden variable-order Markov chain. + * + * \param[in] imemory memory distribution, + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] min_nb_value minimum number of values, + * \param[in] cumul_threshold threshold on the cumulative distribution function. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::output_sojourn_time_distribution(const double *imemory , int variable , + int output , int min_nb_value , + double cumul_threshold) + +{ + int i , j , k; + double sum , *observation , *memory , *previous_memory , *pmass , *pcumul , + &absorption = categorical_process[variable]->absorption[output]; + DiscreteParametric *sojourn_time; + + + sojourn_time = categorical_process[variable]->sojourn_time[output]; + + pmass = sojourn_time->mass; + pcumul = sojourn_time->cumul; + *pmass = 0.; + *pcumul = 0.; + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + // initialization of the probabilities of the memories + + sum = 0.; + + for (i = 1;i < nb_row;i++) { + if (order[i] == 1) { + memory[i] = 0.; + for (j = 0;j < nb_memory[i];j++) { + memory[i] += (1. - observation[state[previous[i][j]][0]]) * transition[previous[i][j]][state[i][0]] * + imemory[previous[i][j]]; + } + + if (type == ORDINARY) { + memory[i] += initial[state[i][0]]; + for (j = 0;j < nb_memory[i];j++) { + memory[i] += (1. - observation[state[previous[i][j]][0]]) * transition[previous[i][j]][state[i][0]] * + initial[state[previous[i][j]][0]]; + } + } + + memory[i] *= observation[state[i][0]]; + } + + else { + memory[i] = (1. - observation[state[i][1]]) * observation[state[i][0]] * imemory[i]; + } + + sum += memory[i]; + } + + for (i = 1;i < nb_row;i++) { + memory[i] /= sum; + } + + i = 1; + + do { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and the current probability mass + + absorption = 0.; + *++pmass = 0.; + + for (j = 1;j < nb_row;j++) { + sum = 0.; + for (k = 0;k < nb_memory[j];k++) { + sum += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + + if ((stype[state[j][0]] == ABSORBING) && (observation[state[j][0]] == 1.)) { + absorption += sum; + } + + memory[j] = observation[state[j][0]] * sum; + *pmass += (1. - observation[state[j][0]]) * sum; + } + + // update of the cumulative distribution function + + pcumul++; + *pcumul = *(pcumul - 1) + *pmass; + i++; + } + while (((*pcumul < cumul_threshold - absorption) || (i < min_nb_value)) && + (i < sojourn_time->alloc_nb_value)); + + if (*pcumul == 0.) { + absorption = 1.; + delete categorical_process[variable]->sojourn_time[output]; + categorical_process[variable]->sojourn_time[output] = NULL; + } + + else { + sojourn_time->nb_value = i; + sojourn_time->complement = absorption; + +# ifdef DEBUG + if (absorption > 0.) { + cout << "\n" << SEQ_label[SEQL_ABSORPTION] << " " << output << " : " + << absorption << " | " << 1. - sojourn_time->cumul[sojourn_time->nb_value - 1] << endl; + } +# endif + + sojourn_time->offset_computation(); + sojourn_time->max_computation(); + sojourn_time->mean_computation(); + sojourn_time->variance_computation(); + } + + delete [] observation; + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the autocorrelation function for a state + * of a variable-order Markov chain (binarized state process). + * + * \param[in] error reference on a StatError object, + * \param[in] istate state, + * \param[in] max_lag maximum lag, + * \param[in] seq pointer on a VariableOrderMarkovData object. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* VariableOrderMarkovChain::state_autocorrelation_computation(StatError &error , + int istate , int max_lag , + const MarkovianSequences *seq) const + +{ + bool status = true; + int i , j , k; + int *category; + double sum , norm , mean , *average_memory , *memory , *previous_memory , *ppoint; + Correlation *correl; + MarkovianSequences *binary_seq; + + + correl = NULL; + error.init(); + +/* if (nb_component > 1) { + status = false; + error.correction_update(STAT_parsing[STATP_CHAIN_STRUCTURE] , STAT_parsing[STATP_IRREDUCIBLE]); + } */ + + if ((istate < 0) || (istate >= nb_state)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_STATE] << " " << istate << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + + else { + for (i = 0;i < nb_component;i++) { + if ((component_nb_state[i] == 1) && (component[i][0] == istate)) { + status = false; + error.update(SEQ_error[SEQR_SINGLE_STATE_COMPONENT]); + break; + } + } + } + + if ((max_lag < max_order) || (max_lag > MAX_LAG)) { + status = false; + error.update(SEQ_error[SEQR_MAX_LAG]); + } + + if (status) { + if ((seq) && (seq->type[0] == STATE)) { + correl = new Correlation(2 , max_lag + 1 , true , PEARSON); + } + else { + correl = new Correlation(1 , max_lag + 1 , false , PEARSON); + } + + i = 0; + if ((seq) && (seq->type[0] == STATE)) { + correl->variable_type[i] = OBSERVED_STATE; + correl->variable1[i++] = istate; + } + correl->variable_type[i] = THEORETICAL_STATE; + correl->variable1[i] = istate; + + switch (type) { + + case ORDINARY : { + average_memory = memory_computation(); + break; + } + + case EQUILIBRIUM : { + average_memory = new double[nb_row]; + for (i = 1;i < nb_row;i++) { + average_memory[i] = initial[i]; + } + break; + } + } + + ppoint = correl->point[((seq) && (seq->type[0] == STATE)) ? 1 : 0]; + *ppoint = 1.; + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + // initialization of the probabilities of the memories + + sum = 0.; + norm = 0.; + + for (i = 1;i < nb_row;i++) { + if (state[i][0] == istate) { + memory[i] = average_memory[i]; + if ((type == ORDINARY) && (order[i] == 1)) { + memory[i] += initial[state[i][0]]; + } + sum += memory[i]; + } + + else { + memory[i] = 0.; + + norm += average_memory[i]; + if ((type == ORDINARY) && (order[i] == 1)) { + norm += initial[state[i][0]]; + } + } + } + + for (i = 1;i < nb_row;i++) { + if (state[i][0] == istate) { + memory[i] /= sum; + } + } + + mean = sum / (sum + norm); + + for (i = 1;i <= max_lag;i++) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and the current autocorrelation coefficient + + *++ppoint = 0.; + + for (j = 1;j < nb_row;j++) { + memory[j] = 0.; + + if (((i < order[j]) && (state[j][i] == istate)) || (i >= order[j])) { + for (k = 0;k < nb_memory[j];k++) { + memory[j] += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + + if (state[j][0] == istate) { + *ppoint += memory[j]; + } + } + } + + *ppoint = (*ppoint - mean) / (1. - mean); + } + + delete [] average_memory; + delete [] memory; + delete [] previous_memory; + + if ((seq) && (seq->type[0] == STATE)) { + category = new int[nb_state]; + for (i = 0;i < nb_state;i++) { + category[i] = 0; + } + category[istate] = 1; + + binary_seq = seq->transcode(error , 1 , category); + binary_seq->correlation_computation(*correl , 0 , 0 , EXACT); + delete [] category; + delete binary_seq; + } + } + + return correl; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the autocorrelation function for a state + * of a variable-order Markov chain (binarized state process). + * + * \param[in] error reference on a StatError object, + * \param[in] istate state, + * \param[in] max_lag maximum lag. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* VariableOrderMarkov::state_autocorrelation_computation(StatError &error , + int istate , int max_lag) const + +{ + Correlation *correl; + + + correl = VariableOrderMarkovChain::state_autocorrelation_computation(error , istate , max_lag , markov_data); + + return correl; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the autocorrelation function for a state + * of a variable-order Markov chain (binarized state process). + * + * \param[in] error reference on a StatError object, + * \param[in] istate state, + * \param[in] max_lag maximum lag. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* VariableOrderMarkovData::state_autocorrelation_computation(StatError &error , + int istate , int max_lag) const + +{ + Correlation *correl; + + + correl = markov->VariableOrderMarkovChain::state_autocorrelation_computation(error , istate , max_lag , this); + + return correl; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the autocorrelation function for a categorical observation + * of a hidden variable-order Markov chain (binarized observation process). + * + * \param[in] error reference on a StatError object, + * \param[in] variable observation process index + * \param[in] output observation, + * \param[in] max_lag maximum lag, + * \param[in] seq pointer on a VariableOrderMarkovData object. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* VariableOrderMarkov::output_autocorrelation_computation(StatError &error , int variable , + int output , int max_lag , + const VariableOrderMarkovData *seq) const + +{ + bool status = true; + int i , j , k; + int seq_variable , *category; + double sum , norm , mean , *average_memory , *observation , *memory , + *previous_memory , *ppoint; + Correlation *correl; + MarkovianSequences *binary_seq; + + + correl = NULL; + error.init(); + +/* if (nb_component > 1) { + status = false; + error.correction_update(STAT_parsing[STATP_CHAIN_STRUCTURE] , STAT_parsing[STATP_IRREDUCIBLE]); + } */ + + if (nb_output_process == 0) { + status = false; + error.update(STAT_error[STATR_NB_OUTPUT_PROCESS]); + } + + else { + if ((variable < 1) || (variable > nb_output_process) || (!categorical_process[variable - 1])) { + status = false; + error.update(STAT_error[STATR_OUTPUT_PROCESS_INDEX]); + } + + else { + variable--; + + if ((output < 0) || (output >= categorical_process[variable]->nb_value)) { + status = false; + ostringstream error_message; + error_message << STAT_label[STATL_OUTPUT] << " " << output << " " + << STAT_error[STATR_NOT_PRESENT]; + error.update((error_message.str()).c_str()); + } + } + } + + if ((max_lag < max_order) || (max_lag > MAX_LAG)) { + status = false; + error.update(SEQ_error[SEQR_MAX_LAG]); + } + + if (status) { + if (seq) { + correl = new Correlation(2 , max_lag + 1 , true , PEARSON); + } + else { + correl = new Correlation(1 , max_lag + 1 , false , PEARSON); + } + + i = 0; + if (seq) { + correl->variable_type[i] = OBSERVED_OUTPUT; + correl->variable1[i++] = output; + } + correl->variable_type[i] = THEORETICAL_OUTPUT; + correl->variable1[i] = output; + + switch (type) { + + case ORDINARY : { + average_memory = memory_computation(); + break; + } + + case EQUILIBRIUM : { + average_memory = new double[nb_row]; + for (i = 1;i < nb_row;i++) { + average_memory[i] = initial[i]; + } + break; + } + } + + ppoint = correl->point[seq ? 1 : 0]; + *ppoint = 1.; + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + memory = new double[nb_row]; + previous_memory = new double[nb_row]; + + // initialization of the probabilities of the memories + + sum = 0.; + norm = 0.; + + for (i = 1;i < nb_row;i++) { + memory[i] = average_memory[i]; + if ((type == ORDINARY) && (order[i] == 1)) { + memory[i] += initial[state[i][0]]; + } + norm += memory[i]; + + memory[i] *= observation[state[i][0]]; + sum += memory[i]; + } + + for (i = 1;i < nb_row;i++) { + memory[i] /= sum; + } + + mean = sum / norm; + + for (i = 1;i <= max_lag;i++) { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + previous_memory[j] = memory[j]; + } + + // computation of the probabilities of the memories and the current autocorrelation coefficient + + *++ppoint = 0.; + + for (j = 1;j < nb_row;j++) { + sum = 0.; + for (k = 0;k < nb_memory[j];k++) { + sum += transition[previous[j][k]][state[j][0]] * previous_memory[previous[j][k]]; + } + + memory[j] = sum; + *ppoint += observation[state[j][0]] * sum; + } + + *ppoint = (*ppoint - mean) / (1. - mean); + } + + delete [] average_memory; + delete [] observation; + delete [] memory; + delete [] previous_memory; + + if (seq) { + switch (seq->type[0]) { + case INT_VALUE : + seq_variable = variable - 1; + break; + case STATE : + seq_variable = variable; + break; + } + + category = new int[categorical_process[variable]->nb_value]; + for (i = 0;i < categorical_process[variable]->nb_value;i++) { + category[i] = 0; + } + category[output] = 1; + + binary_seq = seq->transcode(error , seq_variable + 1 , category); + binary_seq->correlation_computation(*correl , seq_variable , seq_variable , EXACT); + delete [] category; + delete binary_seq; + } + } + + return correl; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the autocorrelation function for a categorical observation + * of a hidden variable-order Markov chain (binarized observation process). + * + * \param[in] error reference on a StatError object, + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] max_lag maximum lag. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* VariableOrderMarkov::output_autocorrelation_computation(StatError &error , + int variable , int output , + int max_lag) const + +{ + Correlation *correl; + + + correl = output_autocorrelation_computation(error , variable , output , max_lag , markov_data); + + return correl; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the autocorrelation function for a categorical observation + * of a hidden variable-order Markov chain (binarized observation process). + * + * \param[in] error reference on a StatError object, + * \param[in] variable observation process index, + * \param[in] output observation, + * \param[in] max_lag maximum lag. + * + * \return Correlation object. + */ +/*--------------------------------------------------------------*/ + +Correlation* VariableOrderMarkovData::output_autocorrelation_computation(StatError &error , + int variable , int output , + int max_lag) const + +{ + Correlation *correl; + + + correl = markov->output_autocorrelation_computation(error , variable , output , max_lag , this); + + return correl; +} + + +}; // namespace sequence_analysis diff --git a/src/cpp/sequence_analysis/vomc_distributions2.cpp b/src/cpp/sequence_analysis/vomc_distributions2.cpp new file mode 100644 index 0000000..96be6a6 --- /dev/null +++ b/src/cpp/sequence_analysis/vomc_distributions2.cpp @@ -0,0 +1,965 @@ +/* -*-c++-*- + * ---------------------------------------------------------------------------- + * + * StructureAnalysis: Identifying patterns in plant architecture and development + * + * Copyright 1995-2018 CIRAD AGAP + * + * File author(s): Yann Guedon (yann.guedon@cirad.fr) + * + * $Source$ + * $Id$ + * + * Forum for StructureAnalysis developers: + * + * ---------------------------------------------------------------------------- + * + * GNU General Public Licence + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS For A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; see the file COPYING. If not, + * write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * ---------------------------------------------------------------------------- + */ + + + +#include "variable_order_markov.h" +#include "sequence_label.h" + +using namespace std; +using namespace stat_tool; + + +namespace sequence_analysis { + + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mixture of the distributions of the number of runs (RUN) or + * occurrences (OCCURRENCE) of a state for a sequence length mixing distribution and + * a variable-order Markov chain. + * + * \param[in] istate state, + * \param[in] pattern count pattern type. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkovChain::state_nb_pattern_mixture(int istate , count_pattern pattern) + +{ + int i , j , k , m; + int max_length , nb_pattern , index_nb_pattern , increment; + double sum , *pmass , *lmass , **memory , **previous_memory; + Distribution *pdist; + + + max_length = state_process->length->nb_value - 1; + + switch (pattern) { + case RUN : + pdist = state_process->nb_run[istate]; + nb_pattern = max_length / 2 + 2; + break; + case OCCURRENCE : + pdist = state_process->nb_occurrence[istate]; + nb_pattern = max_length + 1; + break; + } + + pmass = pdist->mass; + for (i = 0;i < pdist->nb_value;i++) { + *pmass++ = 0.; + } + + memory = new double*[nb_row]; + previous_memory = new double*[nb_row]; + for (i = 1;i < nb_row;i++) { + memory[i] = new double[nb_pattern]; + previous_memory[i] = new double[nb_pattern]; + } + + lmass = state_process->length->mass; + index_nb_pattern = 1; + + for (i = 0;i < max_length;i++) { + + // initialization of the probabilities of the memories + // for a number of runs or occurrences of the selected state + + if (i == 0) { + switch (type) { + + case ORDINARY : { + for (j = 1;j < nb_row;j++) { + if (order[j] == 1) { + if (state[j][0] == istate) { + memory[j][0] = 0.; + memory[j][1] = initial[state[j][0]]; + } + else { + memory[j][0] = initial[state[j][0]]; + memory[j][1] = 0.; + } + } + + else { + memory[j][0] = 0.; + memory[j][1] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + if (state[j][0] == istate) { + memory[j][0] = 0.; + memory[j][1] = initial[j]; + } + else { + memory[j][0] = initial[j]; + memory[j][1] = 0.; + } + } + + else { + memory[j][0] = 0.; + memory[j][1] = 0.; + } + } + break; + } + } + } + + else { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + for (k = 0;k < index_nb_pattern;k++) { + previous_memory[j][k] = memory[j][k]; + memory[j][k] = 0.; + } + memory[j][index_nb_pattern] = 0.; + } + + for (j = 1;j < nb_row;j++) { + + // computation of the probabilities of the memories + // for each number of runs or occurrences of the selected state + + for (k = 0;k < nb_memory[j];k++) { + switch (pattern) { + case RUN : + increment = (((state[j][0] == istate) && + (((order[j] == 1) && (state[previous[j][k]][0] != istate)) || + ((order[j] > 1) && (state[j][1] != istate)))) ? 1 : 0); + break; + case OCCURRENCE : + increment = (state[j][0] == istate ? 1 : 0); + break; + } + + for (m = 0;m < index_nb_pattern;m++) { + memory[j][m + increment] += transition[previous[j][k]][state[j][0]] * + previous_memory[previous[j][k]][m]; + } + } + } + } + + if ((pattern == OCCURRENCE) || (i % 2 == 0)) { + index_nb_pattern++; + } + + // update of the mixture of the distributions of the number of runs or occurrences of the selected state + + if (*++lmass > 0.) { + pmass = pdist->mass; + for (j = 0;j < index_nb_pattern;j++) { + sum = 0.; + for (k = 1;k < nb_row;k++) { + sum += memory[k][j]; + } + *pmass++ += *lmass * sum; + } + } + } + + pdist->nb_value_computation(); + pdist->offset_computation(); + pdist->cumul_computation(); + + pdist->max_computation(); + pdist->mean_computation(); + pdist->variance_computation(); + + for (i = 1;i < nb_row;i++) { + delete [] memory[i]; + delete [] previous_memory[i]; + } + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mixture of the distributions of the number of runs of + * a categorical observation for a sequence length mixing distribution and + * a hidden variable-order Markov chain. + * + * \param[in] variable observation process index, + * \param[in] output observation. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::output_nb_run_mixture(int variable , int output) + +{ + int i , j , k , m; + int max_length , nb_pattern , index_nb_pattern; + double sum , *observation , *pmass , *lmass , **memory , **previous_memory; + Distribution *nb_run; + + + nb_run = categorical_process[variable]->nb_run[output]; + + max_length = categorical_process[variable]->length->nb_value - 1; + nb_pattern = max_length / 2 + 2; + + pmass = nb_run->mass; + for (i = 0;i < nb_run->nb_value;i++) { + *pmass++ = 0.; + } + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + memory = new double*[nb_row]; + previous_memory = new double*[nb_row]; + for (i = 1;i < nb_row;i++) { + memory[i] = new double[nb_pattern * 2]; + previous_memory[i] = new double[nb_pattern * 2]; + } + + lmass = categorical_process[variable]->length->mass; + index_nb_pattern = 1; + + for (i = 0;i < max_length;i++) { + + // initialization of the probabilities of the memories + // for a number of runs of the selected observation + + if (i == 0) { + switch (type) { + + case ORDINARY : { + for (j = 1;j < nb_row;j++) { + if (order[j] == 1) { + memory[j][0] = (1. - observation[state[j][0]]) * initial[state[j][0]]; + memory[j][1] = 0.; + memory[j][2] = 0.; + memory[j][3] = observation[state[j][0]] * initial[state[j][0]]; + } + else { + for (k = 0;k < 4;k++) { + memory[j][k] = 0.; + } + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + memory[j][0] = (1. - observation[state[j][0]]) * initial[j]; + memory[j][1] = 0.; + memory[j][2] = 0.; + memory[j][3] = observation[state[j][0]] * initial[j]; + } + else { + for (k = 0;k < 4;k++) { + memory[j][k] = 0.; + } + } + } + break; + } + } + } + + else { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + for (k = 0;k < index_nb_pattern * 2;k++) { + previous_memory[j][k] = memory[j][k]; + memory[j][k] = 0.; + } + for (k = index_nb_pattern * 2;k < (index_nb_pattern + 1) * 2;k++) { + previous_memory[j][k] = 0.; + memory[j][k] = 0.; + } + } + + for (j = 1;j < nb_row;j++) { + + // computation of the probabilities of the memories + // for each number of runs of the selected observation + + for (k = 0;k < nb_memory[j];k++) { + for (m = 0;m <= index_nb_pattern;m++) { + if (m < index_nb_pattern) { + memory[j][m * 2] += (1. - observation[state[j][0]]) * transition[previous[j][k]][state[j][0]] * + (previous_memory[previous[j][k]][m * 2] + previous_memory[previous[j][k]][m * 2 + 1]); + } + if (m > 0) { + memory[j][m * 2 + 1] += observation[state[j][0]] * transition[previous[j][k]][state[j][0]] * + (previous_memory[previous[j][k]][(m - 1) * 2] + previous_memory[previous[j][k]][m * 2 + 1]); + } + } + } + } + } + + if (i % 2 == 0) { + index_nb_pattern++; + } + + // update of the mixture of the distributions of the number of runs of the selected observation + + if (*++lmass > 0.) { + pmass = nb_run->mass; + for (j = 0;j < index_nb_pattern;j++) { + sum = 0.; + for (k = 1;k < nb_row;k++) { + sum += memory[k][j * 2] + memory[k][j * 2 + 1]; + } + *pmass++ += *lmass * sum; + } + } + } + + nb_run->nb_value_computation(); + nb_run->offset_computation(); + nb_run->cumul_computation(); + + nb_run->max_computation(); + nb_run->mean_computation(); + nb_run->variance_computation(); + + delete [] observation; + + for (i = 1;i < nb_row;i++) { + delete [] memory[i]; + delete [] previous_memory[i]; + } + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the mixture of the distributions of the number of occurrences of + * a categorical observation for a sequence length mixing distribution and + * a hidden variable-order Markov chain. + * + * \param[in] variable observation process index, + * \param[in] output observation. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::output_nb_occurrence_mixture(int variable , int output) + +{ + int i , j , k , m; + int max_length , nb_pattern , index_nb_pattern; + double sum , *observation , *pmass , *lmass , **memory , **previous_memory; + Distribution *nb_occurrence; + + + nb_occurrence = categorical_process[variable]->nb_occurrence[output]; + + max_length = categorical_process[variable]->length->nb_value - 1; + nb_pattern = max_length + 1; + + pmass = nb_occurrence->mass; + for (i = 0;i < nb_occurrence->nb_value;i++) { + *pmass++ = 0.; + } + + observation = new double[nb_state]; + for (i = 0;i < nb_state;i++) { + observation[i] = categorical_process[variable]->observation[i]->mass[output]; + } + + memory = new double*[nb_row]; + previous_memory = new double*[nb_row]; + for (i = 1;i < nb_row;i++) { + memory[i] = new double[nb_pattern]; + previous_memory[i] = new double[nb_pattern]; + } + + lmass = categorical_process[variable]->length->mass; + index_nb_pattern = 1; + + for (i = 0;i < max_length;i++) { + + // initialization of the probabilities of the memories + // for a number of occurrences of the selected observation + + if (i == 0) { + switch (type) { + + case ORDINARY : { + for (j = 1;j < nb_row;j++) { + if (order[j] == 1) { + memory[j][0] = (1. - observation[state[j][0]]) * initial[state[j][0]]; + memory[j][1] = observation[state[j][0]] * initial[state[j][0]]; + } + else { + memory[j][0] = 0.; + memory[j][1] = 0.; + } + } + break; + } + + case EQUILIBRIUM : { + for (j = 1;j < nb_row;j++) { + if (!child[j]) { + memory[j][0] = (1. - observation[state[j][0]]) * initial[j]; + memory[j][1] = observation[state[j][0]] * initial[j]; + } + else { + memory[j][0] = 0.; + memory[j][1] = 0.; + } + } + break; + } + } + } + + else { + + // update of the probabilities of the memories + + for (j = 1;j < nb_row;j++) { + for (k = 0;k < index_nb_pattern;k++) { + previous_memory[j][k] = memory[j][k]; + memory[j][k] = 0.; + } + memory[j][index_nb_pattern] = 0.; + } + + for (j = 0;j < nb_row;j++) { + + // computation of the probabilities of the memories + // for each number of occurrences of the selected observation + + for (k = 0;k < nb_memory[j];k++) { + for (m = 0;m < index_nb_pattern;m++) { + memory[j][m] += (1. - observation[state[j][0]]) * transition[previous[j][k]][state[j][0]] * + previous_memory[previous[j][k]][m]; + memory[j][m + 1] += observation[state[j][0]] * transition[previous[j][k]][state[j][0]] * + previous_memory[previous[j][k]][m]; + } + } + } + } + + index_nb_pattern++; + + // update of the mixture of the distributions of the number of occurrences of the selected observation + + if (*++lmass > 0.) { + pmass = nb_occurrence->mass; + for (j = 0;j < index_nb_pattern;j++) { + sum = 0.; + for (k = 1;k < nb_row;k++) { + sum += memory[k][j]; + } + *pmass++ += *lmass * sum; + } + } + } + + nb_occurrence->nb_value_computation(); + nb_occurrence->offset_computation(); + nb_occurrence->cumul_computation(); + + nb_occurrence->max_computation(); + nb_occurrence->mean_computation(); + nb_occurrence->variance_computation(); + + delete [] observation; + + for (i = 1;i < nb_row;i++) { + delete [] memory[i]; + delete [] previous_memory[i]; + } + delete [] memory; + delete [] previous_memory; +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the characteristic distributions of a VariableOrderMarkov object. + * + * \param[in] length sequence length, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] variable observation process index. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::characteristic_computation(int length , bool counting_flag , + int variable) + +{ + if (nb_component > 0) { + bool computation[NB_OUTPUT_PROCESS + 1]; + int i , j , k; + double *memory; + DiscreteParametric dlength(UNIFORM , length , length , D_DEFAULT , D_DEFAULT); + + + memory = NULL; + + // computation of the state intensity and interval distributions + + if (((variable == I_DEFAULT) || (variable == 0)) && + ((!(state_process->length)) || + (dlength != *(state_process->length)))) { + computation[0] = true; + state_process->create_characteristic(dlength , true , counting_flag); + + switch (type) { + + case ORDINARY : { + memory = memory_computation(); + break; + } + + case EQUILIBRIUM : { + memory = new double[nb_row]; + for (i = 1;i < nb_row;i++) { + memory[i] = initial[i]; + } + break; + } + } + + index_state_distribution(); + + for (i = 0;i < nb_state;i++) { + if (type == ORDINARY) { + state_no_occurrence_probability(i); + } + state_first_occurrence_distribution(i); + + if (type == ORDINARY) { + state_leave_probability(memory , i); + } + state_recurrence_time_distribution(memory , i); + + if (stype[i] != ABSORBING) { + state_sojourn_time_distribution(memory , i); + } + else { + state_process->absorption[i] = 1.; + delete state_process->sojourn_time[i]; + state_process->sojourn_time[i] = NULL; + } + } + +# ifdef DEBUG + if (type == EQUILIBRIUM) { + double sum = 0.; + + // computation of the stationary distribution in the case of an equilibrium process + // with renormalization for taking account of the thresholds applied on + // the cumulative distribution functions of the recurrence times in states + + cout << "\n" << STAT_label[STATL_STATIONARY_PROBABILITIES] << endl; + for (i = 1;i < nb_row;i++) { + cout << initial[i] << " "; + } + cout << endl; + + for (i = 0;i < nb_state;i++) { + sum += 1. / state_process->recurrence_time[i]->mean; + } + for (i = 0;i < nb_state;i++) { + cout << 1. / (state_process->recurrence_time[i]->mean * sum) << " "; + } + cout << endl; + } +# endif + + } + + else { + computation[0] = false; + } + + // computation of the observation intensity and interval distributions + + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) && ((variable == I_DEFAULT) || (i == variable)) && + ((!(categorical_process[i]->length)) || + (dlength != *(categorical_process[i]->length)))) { + computation[i + 1] = true; + categorical_process[i]->create_characteristic(dlength , true , counting_flag); + + index_output_distribution(i); + + if (!memory) { + switch (type) { + + case ORDINARY : { + memory = memory_computation(); + break; + } + + case EQUILIBRIUM : { + memory = new double[nb_row]; + for (j = 1;j < nb_row;j++) { + memory[j] = initial[j]; + } + break; + } + } + } + + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if (type == ORDINARY) { + output_no_occurrence_probability(i , j); + } + if (categorical_process[i]->no_occurrence[j] < 1. - DOUBLE_ERROR) { + output_first_occurrence_distribution(i , j); + } + else { + delete categorical_process[i]->first_occurrence[j]; + categorical_process[i]->first_occurrence[j] = NULL; + categorical_process[i]->leave[j] = 1.; + } + + if ((type == ORDINARY) && (categorical_process[i]->first_occurrence[j])) { + output_leave_probability(memory , i , j); + } + if (categorical_process[i]->leave[j] < 1. - DOUBLE_ERROR) { + output_recurrence_time_distribution(memory , i , j); + } + else { + delete categorical_process[i]->recurrence_time[j]; + categorical_process[i]->recurrence_time[j] = NULL; + } + + for (k = 0;k < nb_state;k++) { + if ((categorical_process[i]->observation[k]->mass[j] > 0.) && + ((stype[k] != ABSORBING) || (categorical_process[i]->observation[k]->mass[j] < 1.))) { + break; + } + } + + if (k < nb_state) { + output_sojourn_time_distribution(memory , i , j); + } + else { + categorical_process[i]->absorption[j] = 1.; + delete categorical_process[i]->sojourn_time[j]; + categorical_process[i]->sojourn_time[j] = NULL; + } + } + } + + else { + computation[i + 1] = false; + } + } + + delete [] memory; + + if (counting_flag) { + + // computation of the state counting distributions + + if (computation[0]) { + for (i = 0;i < nb_state;i++) { + state_nb_pattern_mixture(i , RUN); + state_nb_pattern_mixture(i , OCCURRENCE); + } + } + + // computation of the observation counting distributions + + for (i = 0;i < nb_output_process;i++) { + if (computation[i + 1]) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + output_nb_run_mixture(i , j); + output_nb_occurrence_mixture(i , j); + } + } + } + } + } +} + + +/*--------------------------------------------------------------*/ +/** + * \brief Computation of the characteristic distributions of a VariableOrderMarkov object. + * + * \param[in] seq reference on a VariableOrderMarkovData object, + * \param[in] counting_flag flag on the computation of the counting distributions, + * \param[in] variable observation process index, + * \param[in] length_flag flag on the sequence length. + */ +/*--------------------------------------------------------------*/ + +void VariableOrderMarkov::characteristic_computation(const VariableOrderMarkovData &seq , + bool counting_flag , int variable , + bool length_flag) + +{ + if (nb_component > 0) { + bool computation[NB_OUTPUT_PROCESS + 1]; + int i , j , k; + int seq_variable; + double *memory; + Distribution dlength(*(seq.length_distribution)); + + + memory = NULL; + + // computation of the state intensity and interval distributions + + if (((variable == I_DEFAULT) || (variable == 0)) && ((!length_flag) || + ((length_flag) && ((!(state_process->length)) || + (dlength != *(state_process->length)))))) { + computation[0] = true; + state_process->create_characteristic(dlength , true , counting_flag); + + switch (type) { + + case ORDINARY : { + memory = memory_computation(); + break; + } + + case EQUILIBRIUM : { + memory = new double[nb_row]; + for (i = 1;i < nb_row;i++) { + memory[i] = initial[i]; + } + break; + } + } + + index_state_distribution(); + + for (i = 0;i < nb_state;i++) { + if (type == ORDINARY) { + state_no_occurrence_probability(i); + } + if (seq.type[0] == STATE) { + state_first_occurrence_distribution(i , ((seq.characteristics[0]) && (i < seq.marginal_distribution[0]->nb_value) && (seq.characteristics[0]->first_occurrence[i]->nb_element > 0) ? seq.characteristics[0]->first_occurrence[i]->nb_value : 1)); + } + else { + state_first_occurrence_distribution(i); + } + + if (type == ORDINARY) { + state_leave_probability(memory , i); + } + if (seq.type[0] == STATE) { + state_recurrence_time_distribution(memory , i , ((seq.characteristics[0]) && (i < seq.marginal_distribution[0]->nb_value) && (seq.characteristics[0]->recurrence_time[i]->nb_element > 0) ? seq.characteristics[0]->recurrence_time[i]->nb_value : 1)); + } + else { + state_recurrence_time_distribution(memory , i); + } + + if (stype[i] != ABSORBING) { + if (seq.type[0] == STATE) { + state_sojourn_time_distribution(memory , i , ((seq.characteristics[0]) && (i < seq.marginal_distribution[0]->nb_value) && (seq.characteristics[0]->sojourn_time[i]->nb_element > 0) ? seq.characteristics[0]->sojourn_time[i]->nb_value : 1)); + } + else { + state_sojourn_time_distribution(memory , i); + } + } + + else { + state_process->absorption[i] = 1.; + delete state_process->sojourn_time[i]; + state_process->sojourn_time[i] = NULL; + } + } + +# ifdef DEBUG + if (type == EQUILIBRIUM) { + double sum = 0.; + + // computation of the stationary distribution in the case of an equilibrium process + // with renormalization for taking account of the thresholds applied on + // the cumulative distribution functions of the recurrence times in states + + cout << "\n" << STAT_label[STATL_STATIONARY_PROBABILITIES] << endl; + for (i = 1;i < nb_row;i++) { + cout << initial[i] << " "; + } + cout << endl; + + for (i = 0;i < nb_state;i++) { + sum += 1. / state_process->recurrence_time[i]->mean; + } + for (i = 0;i < nb_state;i++) { + cout << 1. / (state_process->recurrence_time[i]->mean * sum) << " "; + } + cout << endl; + } +# endif + + } + + else { + computation[0] = false; + } + + // computation of the observation intensity and interval distributions + + for (i = 0;i < nb_output_process;i++) { + if ((categorical_process[i]) && ((variable == I_DEFAULT) || (variable == 1)) && + ((!length_flag) || ((length_flag) && ((!(categorical_process[i]->length)) || + (dlength != *(categorical_process[i]->length)))))) { + computation[i + 1] = true; + categorical_process[i]->create_characteristic(dlength , true , counting_flag); + + switch (seq.type[0]) { + case STATE : + seq_variable = i + 1; + break; + default : + seq_variable = i; + break; + } + + index_output_distribution(i); + + if (!memory) { + switch (type) { + + case ORDINARY : { + memory = memory_computation(); + break; + } + + case EQUILIBRIUM : { + memory = new double[nb_row]; + for (j = 1;j < nb_row;j++) { + memory[j] = initial[j]; + } + break; + } + } + } + + for (j = 0;j < categorical_process[i]->nb_value;j++) { + if (type == ORDINARY) { + output_no_occurrence_probability(i , j); + } + if (categorical_process[i]->no_occurrence[j] < 1. - DOUBLE_ERROR) { + output_first_occurrence_distribution(i , j , ((seq.characteristics[seq_variable]) && (j < seq.characteristics[seq_variable]->nb_value) && (seq.characteristics[seq_variable]->first_occurrence[j]->nb_element > 0) ? seq.characteristics[seq_variable]->first_occurrence[j]->nb_value : 1)); + } + else { + delete categorical_process[i]->first_occurrence[j]; + categorical_process[i]->first_occurrence[j] = NULL; + categorical_process[i]->leave[j] = 1.; + } + + if ((type == ORDINARY) && (categorical_process[i]->first_occurrence[j])) { + output_leave_probability(memory , i , j); + } + if (categorical_process[i]->leave[j] < 1. - DOUBLE_ERROR) { + output_recurrence_time_distribution(memory , i , j , ((seq.characteristics[seq_variable]) && (j < seq.characteristics[seq_variable]->nb_value) && (seq.characteristics[seq_variable]->recurrence_time[j]->nb_element > 0) ? seq.characteristics[seq_variable]->recurrence_time[j]->nb_value : 1)); + } + else { + delete categorical_process[i]->recurrence_time[j]; + categorical_process[i]->recurrence_time[j] = NULL; + } + + for (k = 0;k < nb_state;k++) { + if ((categorical_process[i]->observation[k]->mass[j] > 0.) && + ((stype[k] != ABSORBING) || (categorical_process[i]->observation[k]->mass[j] < 1.))) { + break; + } + } + + if (k < nb_state) { + output_sojourn_time_distribution(memory , i , j , ((seq.characteristics[seq_variable]) && (j < seq.characteristics[seq_variable]->nb_value) && (seq.characteristics[seq_variable]->sojourn_time[j]->nb_element > 0) ? seq.characteristics[seq_variable]->sojourn_time[j]->nb_value : 1)); + } + else { + categorical_process[i]->absorption[j] = 1.; + delete categorical_process[i]->sojourn_time[j]; + categorical_process[i]->sojourn_time[j] = NULL; + } + } + } + + else { + computation[i + 1] = false; + } + } + + delete [] memory; + + if (counting_flag) { + + // computation of the state counting distributions + + if (computation[0]) { + for (i = 0;i < nb_state;i++) { + state_nb_pattern_mixture(i , RUN); + state_nb_pattern_mixture(i , OCCURRENCE); + } + } + + // computation of the observation counting distributions + + for (i = 0;i < nb_output_process;i++) { + if (computation[i + 1]) { + for (j = 0;j < categorical_process[i]->nb_value;j++) { + output_nb_run_mixture(i , j); + output_nb_occurrence_mixture(i , j); + } + } + } + } + } +} + + +}; // namespace sequence_analysis diff --git a/src/openalea/sequence_analysis/__init__.py b/src/openalea/sequence_analysis/__init__.py index c9a06e5..38482b4 100644 --- a/src/openalea/sequence_analysis/__init__.py +++ b/src/openalea/sequence_analysis/__init__.py @@ -1,18 +1,10 @@ """Sequence Analysis init file""" -__revision__ = "$Id$" from openalea.stat_tool import * -#import openalea.stat_tool._stat_tool +# import openalea.stat_tool._stat_tool -from openalea.deploy.shared_data import get_shared_data_path from os.path import join as pj -def get_shared_data(file): - import openalea.sequence_analysis - shared_data_path = get_shared_data_path(openalea.sequence_analysis.__path__) - return pj(shared_data_path, file) - - import openalea.stat_tool.interface as interface @@ -21,13 +13,14 @@ def get_shared_data(file): from .compare import * from .time_events import * + # from top_parameters import * # from tops import * from .sequences import * from .hidden_semi_markov import * from .hidden_variable_order_markov import * from .semi_markov import * -from .data_transform import * +from .data_transform import * from .estimate import * from .nonhomogeneous_markov import * @@ -35,3 +28,28 @@ def get_shared_data(file): from .variable_order_markov import * from .distance_matrix import * from .enums_seq import * + +try: + __version__ = version("openalea.sequence_analysis") +except PackageNotFoundError: + # package is not installed + pass + +#if sys.platform.startswith("win"): +# os.add_dll_directory(str(Path(__file__).parent.parent / "lib")) + + +def get_shared_data(file): + import openalea.sequence_analysis + + sadir = files("openalea.sequence_analysis") + if sadir.is_dir(): + datadir = sadir / "data" + with as_file(datadir / file) as f: + return str(f) + +def get_shared_data_path(): + import openalea.sequence_analysis + + datadir = files("openalea.sequence_analysis") + return datadir/'data' diff --git a/src/openalea/sequence_analysis/compare.py b/src/openalea/sequence_analysis/compare.py index 7ceafdf..f943f21 100644 --- a/src/openalea/sequence_analysis/compare.py +++ b/src/openalea/sequence_analysis/compare.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -#-*- coding: utf-8 -*- +# -*- coding: utf-8 -*- """ .. topic:: compare.py summary @@ -12,6 +12,7 @@ :Revision: $Id$ """ + __version__ = "$Id$" @@ -19,22 +20,29 @@ from openalea.stat_tool._stat_tool import I_DEFAULT from openalea.stat_tool._stat_tool import _VectorDistance, _Vectors from openalea.sequence_analysis._sequence_analysis import ( - _SemiMarkov, - _HiddenSemiMarkov, - _VariableOrderMarkov, - _HiddenVariableOrderMarkov + _SemiMarkov, + _HiddenSemiMarkov, + _VariableOrderMarkov, + _HiddenVariableOrderMarkov, ) from openalea.stat_tool.enums import ( - histogram_types, bool_type, - format_type, algorithm_type + histogram_types, + bool_type, + format_type, + algorithm_type, ) from .enums_seq import ( - begin_aligned_map, sequence_alignment_first_arg, output_sequence, - markov_model_comparison_first_arg, indel_cost_map, ms_vomd_smd_nhmd, - markov_model_for_sequences_first_arg, markovian_algorithms, - markov_model_for_sequences_second_arg + begin_aligned_map, + sequence_alignment_first_arg, + output_sequence, + markov_model_comparison_first_arg, + indel_cost_map, + ms_vomd_smd_nhmd, + markov_model_for_sequences_first_arg, + markovian_algorithms, + markov_model_for_sequences_second_arg, ) from openalea.stat_tool import error @@ -46,7 +54,7 @@ def _compare_markovian_sequences(obj, *args, **kargs): .. doctest:: :options: +SKIP - + >>> Compare(mc1, length_histo1, mc2, length_histo2,..., FileName="result") >>> Compare(mc1, mc2,..., nb_seq, length, FileName="result") >>> Compare(mc1, seqm1, mc2, seqm2,..., nb_seq, FileName="result") @@ -70,12 +78,12 @@ def _compare_markovian_sequences(obj, *args, **kargs): filename = kargs.get("Filename", None) - from .enums import ms_vomd_smd_list + from .enums_seq import ms_vomd_smd_list + # Type of arg0 is same as type of obj., so we have the following case # >>> Compare(hsmc1, hsmc2, ,..., nb_seq, FileName="result") if type(args[0]) == type(obj): - - #first_list.append(obj) + # first_list.append(obj) for arg in args: if (isinstance(arg, int)) and nb_seq is None: nb_seq = arg @@ -84,11 +92,10 @@ def _compare_markovian_sequences(obj, *args, **kargs): else: first_list.append(arg) - return obj.divergence_computation_length(first_list, nb_seq, - length, filename) + return obj.divergence_computation_length(first_list, nb_seq, length, filename) # Case where second arguments is Markovian and alternates with obj's type elif type(args[0]) in ms_vomd_smd_list: - #first_list.append(obj) + # first_list.append(obj) for arg in args: if (isinstance(arg, int)) and nb_seq is None: @@ -97,34 +104,35 @@ def _compare_markovian_sequences(obj, *args, **kargs): first_list.append(arg) else: second_list.append(arg) - return obj.divergence_computation_sequences(first_list, second_list, - nb_seq, filename) + return obj.divergence_computation_sequences( + first_list, second_list, nb_seq, filename + ) # Case where second arguments is histogram and then # alternates with obj's type - elif (isinstance(arg[0], histogram_types)): + elif isinstance(arg[0], histogram_types): hlength = [] for arg in args: if type(arg) == type(obj): first_list.append(arg) else: hlength.append(arg) - return obj.divergence_computation_histogram(first_list, hlength, - filename) + return obj.divergence_computation_histogram(first_list, hlength, filename) else: raise Exception("case not handled. ") - - def _compare_sequences(seq, *args, **kargs): """compare function related to sequences""" - #int indel_cost = ADAPTATIVE , algorithm = AGGLOMERATIVE; - #double indel_factor , transposition_factor = TRANSPOSITION_FACTOR; + # int indel_cost = ADAPTATIVE , algorithm = AGGLOMERATIVE; + # double indel_factor , transposition_factor = TRANSPOSITION_FACTOR; - from openalea.sequence_analysis._sequence_analysis import \ - INDEL_FACTOR_1, INDEL_FACTOR_N, TRANSPOSITION_FACTOR + from openalea.sequence_analysis._sequence_analysis import ( + INDEL_FACTOR_1, + INDEL_FACTOR_N, + TRANSPOSITION_FACTOR, + ) error.CheckType([seq], [sequence_alignment_first_arg]) @@ -133,56 +141,67 @@ def _compare_sequences(seq, *args, **kargs): Begin = error.ParseKargs(kargs, "Begin", "Aligned", begin_aligned_map) End = error.ParseKargs(kargs, "End", "Aligned", begin_aligned_map) FileName = kargs.get("FileName", None) - Format = error.ParseKargs(kargs, "Format", 'ASCII', format_type) - AlignmentFormat = error.ParseKargs(kargs, "AlignmentFormat", 'ASCII', - format_type) + Format = error.ParseKargs(kargs, "Format", "ASCII", format_type) + AlignmentFormat = error.ParseKargs(kargs, "AlignmentFormat", "ASCII", format_type) AlignmentFileName = kargs.get("AlignmentFileName", None) - IndelCost = error.ParseKargs(kargs, "IndelCost", "Adaptative", - indel_cost_map) + IndelCost = error.ParseKargs(kargs, "IndelCost", "Adaptative", indel_cost_map) IndelFactor = kargs.get("IndelFactor", INDEL_FACTOR_1) Transposition = error.ParseKargs(kargs, "Transposition", False, bool_type) - TranspositionFactor = error.ParseKargs(kargs, "TranspositionFactor", - TRANSPOSITION_FACTOR) - Algorithm = error.ParseKargs(kargs, "Algorithm", "Agglomerative", - algorithm_type) + TranspositionFactor = error.ParseKargs( + kargs, "TranspositionFactor", TRANSPOSITION_FACTOR + ) + Algorithm = error.ParseKargs(kargs, "Algorithm", "Agglomerative", algorithm_type) # check all int and float cases - error.CheckType([ref_identifier, test_identifier, IndelFactor, - TranspositionFactor, TranspositionFactor], - [int, int, [int, float], [int, float], [int, float]]) - - + error.CheckType( + [ + ref_identifier, + test_identifier, + IndelFactor, + TranspositionFactor, + TranspositionFactor, + ], + [int, int, [int, float], [int, float], [int, float]], + ) # case 2 of AML if len(args) == 1: - if isinstance(args[0], _VectorDistance): + Output = error.ParseKargs( + kargs, "Output", "DistanceMatrix", output_sequence + ) - Output = error.ParseKargs(kargs, "Output", "DistanceMatrix", - output_sequence) - - if Output == 'm': + if Output == "m": if not error.ParseKargs(kargs, IndelFactor): IndelFactor = INDEL_FACTOR_1 - if kargs.get("Algorithm", None): raise ValueError("Algorithm cannot be used in this context") - #todo: othet error case ? - #if ((!transposition_option) && (transposition_factor_option) - #if ((!file_name_option) && (format_option)) - #if ((!alignment_file_name_option) && (alignment_format_option) - - dist_matrix = seq.alignment_vector_distance(args[0], - ref_identifier, test_identifier, Begin, End, - IndelCost, IndelFactor, Transposition, - TranspositionFactor, FileName, Format, - AlignmentFileName, AlignmentFormat) + # todo: othet error case ? + # if ((!transposition_option) && (transposition_factor_option) + # if ((!file_name_option) && (format_option)) + # if ((!alignment_file_name_option) && (alignment_format_option) + + dist_matrix = seq.alignment_vector_distance( + args[0], + ref_identifier, + test_identifier, + Begin, + End, + IndelCost, + IndelFactor, + Transposition, + TranspositionFactor, + FileName, + Format, + AlignmentFileName, + AlignmentFormat, + ) return dist_matrix - elif Output == 's': - #check errors + elif Output == "s": + # check errors # if (ref_sequence_option): - #if (test_sequence_option): + # if (test_sequence_option): # if (transposition_option) # if (transposition_factor_option) # if (format_option) @@ -193,20 +212,26 @@ def _compare_sequences(seq, *args, **kargs): if not error.ParseKargs(kargs, IndelFactor): IndelFactor = INDEL_FACTOR_N + sequence = seq.multiple_alignment( + args[0], Begin, End, IndelCost, IndelFactor, Algorithm, FileName + ) - sequence = seq.multiple_alignment(args[0], Begin, End, - IndelCost, IndelFactor, Algorithm, - FileName) - - if hasattr(seq, 'markovian_sequences'): + if hasattr(seq, "markovian_sequences"): return sequence.markovian_sequences() else: return sequence - else: #case 1 of AML - dist_matrix = seq.alignment(ref_identifier, test_identifier, Begin , - End , FileName , Format , AlignmentFileName, - AlignmentFormat) + else: # case 1 of AML + dist_matrix = seq.alignment( + ref_identifier, + test_identifier, + Begin, + End, + FileName, + Format, + AlignmentFileName, + AlignmentFormat, + ) return dist_matrix @@ -219,8 +244,7 @@ def _compare_markovian_models_for_sequences(obj, *args, **kargs): error.CheckType([obj], [ms_vomd_smd_nhmd]) Filename = kargs.get("Filename", None) - Algorithm = error.ParseKargs(kargs, "Algorithm", 'Forward', - markovian_algorithms) + Algorithm = error.ParseKargs(kargs, "Algorithm", "Forward", markovian_algorithms) markov_list = [] for arg in args: @@ -228,11 +252,11 @@ def _compare_markovian_models_for_sequences(obj, *args, **kargs): markov_list.append(arg) if isinstance(args[0], _HiddenVariableOrderMarkov): - return obj.comparison_hidden_variable_order_markov(markov_list, - Algorithm, Filename) + return obj.comparison_hidden_variable_order_markov( + markov_list, Algorithm, Filename + ) if isinstance(args[0], _HiddenSemiMarkov): - return obj.comparison_hidden_semi_markov(markov_list, - Algorithm, Filename) + return obj.comparison_hidden_semi_markov(markov_list, Algorithm, Filename) elif isinstance(args[0], _VariableOrderMarkov): return obj.comparison_variable_order_markov(markov_list, Filename) # obj should be a sequence @@ -253,7 +277,7 @@ def Compare(arg1, *args, **kargs): .. doctest:: :options: +SKIP - + >>> Compare(histo1, histo2,..., type, FileName="result", Format="ASCII") >>> Compare(vec, vector_distance) @@ -441,30 +465,30 @@ def Compare(arg1, *args, **kargs): p1 = arg1 - # COMPARE 1 if type(p1) in histogram_types: return compare_histo(arg1, *args, **kargs) # COMPARE 2 elif isinstance(p1, _Vectors): return compare_vectors(arg1, *args, **kargs) - #COMPARE 3 - elif type(p1) in sequence_alignment_first_arg and len(args)==0: + # COMPARE 3 + elif type(p1) in sequence_alignment_first_arg and len(args) == 0: return _compare_sequences(arg1, *args, **kargs) - #COMPARE3 bis - elif type(p1) in sequence_alignment_first_arg and \ - isinstance(args[0], _VectorDistance): + # COMPARE3 bis + elif type(p1) in sequence_alignment_first_arg and isinstance( + args[0], _VectorDistance + ): return _compare_sequences(arg1, *args, **kargs) - #Compare 4 - elif type(p1) in markov_model_for_sequences_first_arg and \ - type(args[0]) in markov_model_for_sequences_second_arg: + # Compare 4 + elif ( + type(p1) in markov_model_for_sequences_first_arg + and type(args[0]) in markov_model_for_sequences_second_arg + ): return _compare_markovian_models_for_sequences(arg1, *args, **kargs) - #COMPARE 5 + # COMPARE 5 elif type(p1) in markov_model_comparison_first_arg: return _compare_markovian_sequences(arg1, *args, **kargs) - - raise Exception("Error in Compare. No case corresponding to your command." - "Check your arguments.") - - + raise Exception( + "Error in Compare. No case corresponding to your command.Check your arguments." + ) diff --git a/src/openalea/sequence_analysis/data/README.txt b/src/openalea/sequence_analysis/data/README.txt new file mode 100644 index 0000000..27c95ee --- /dev/null +++ b/src/openalea/sequence_analysis/data/README.txt @@ -0,0 +1 @@ +The directory "data" contains data samples that are used by the tests and the documentation. diff --git a/src/openalea/sequence_analysis/data/abri13.ren b/src/openalea/sequence_analysis/data/abri13.ren new file mode 100644 index 0000000..a58dbc7 --- /dev/null +++ b/src/openalea/sequence_analysis/data/abri13.ren @@ -0,0 +1,11 @@ +16 0 1 +16 1 7 +16 2 14 +16 3 19 +16 4 27 +16 5 19 +16 6 17 +16 7 10 +16 8 7 +16 9 3 +16 10 1 diff --git a/src/openalea/sequence_analysis/data/abricotier_suivi_11.seq b/src/openalea/sequence_analysis/data/abricotier_suivi_11.seq new file mode 100644 index 0000000..db86597 --- /dev/null +++ b/src/openalea/sequence_analysis/data/abricotier_suivi_11.seq @@ -0,0 +1,271 @@ +INDEX_PARAMETER : TIME # 74 values + +# time histogram - sample size: 883 +# mean: 16.6331 variance: 223.623 standard deviation: 14.954 + +1 VARIABLE + +VARIABLE 1 : INT # 41 values + +# value histogram - sample size: 883 +# mean: 14.3986 variance: 61.3624 standard deviation: 7.83342 + +# sequence length histogram - sample size: 127 +# mean: 6.95276 variance: 8.56918 standard deviation: 2.92732 + +# cumulated length: 883 + +0 1 | 3 2 | 7 6 | 10 10 | 13 12 # | 17 12 | 20 12 # (1) + +0 5 | 3 6 | 7 11 | 10 15 | 13 18 # | 17 18 | 20 18 # (2) + +0 4 | 3 9 | 7 10 | 10 20 # | 13 20 | 17 20 # (3) + +0 4 | 3 6 | 7 10 | 10 13 | 13 14 | 17 14 | 20 15 | 24 18 | 28 20 | 49 22 # | 52 22 # (4) + +0 4 | 3 8 | 7 10 | 10 12 | 13 15 | 17 16 # | 20 16 # (5) + +0 4 | 3 7 | 7 8 | 10 15 | 13 15 | 17 15 | 20 17 | 24 18 # | 28 18 # (6) + +0 1 | 3 6 | 7 9 | 10 15 | 13 19 | 17 21 # | 20 21 # (7) + +0 0 | 3 5 | 7 8 | 10 14 | 13 15 | 17 15 | 20 18 # | 24 18 # (8) + +0 0 | 3 3 | 7 7 | 10 14 | 13 16 | 17 19 | 20 22 | 24 22 | 28 26 | 49 30 # | 52 30 # (9) + +0 0 | 3 1 | 7 3 | 10 6 | 13 9 | 17 10 | 20 11 # | 24 11 # (10) + +0 2 | 3 6 | 7 8 | 10 11 | 13 14 | 17 17 | 20 19 # | 24 19 # (11) + +0 2 | 3 4 | 7 8 | 10 14 | 13 17 | 17 17 | 20 19 # | 24 19 # (12) + +0 0 | 3 9 | 7 9 | 10 11 # | 13 11 # (13) + +0 0 | 3 3 | 7 7 | 10 12 | 13 15 # | 17 15 # (14) + +0 3 | 3 4 | 7 8 | 10 11 | 13 11 | 17 12 | 20 16 # | 24 16 # (15) + +0 3 | 3 5 | 7 10 | 10 14 | 13 16 # | 17 16 # (16) + +0 1 | 3 2 | 7 2 | 10 8 | 13 11 | 17 13 | 20 14 # | 24 14 # (17) + +0 3 | 3 4 | 7 8 | 10 15 | 13 19 | 17 19 | 20 21 | 24 23 | 28 24 | 49 30 # | 52 30 # (18) + +0 1 | 3 3 | 7 7 | 10 10 | 13 12 | 17 12 | 20 12 | 24 14 | 28 14 | 49 18 | 52 24 # | 56 24 # (19) + +0 7 | 3 8 | 7 10 | 10 16 | 13 17 # | 17 17 # (20) + +0 2 | 3 2 | 7 3 | 10 11 # | 13 11 # (21) + +0 1 | 3 3 | 7 9 | 10 17 | 13 19 | 17 19 | 20 19 | 24 22 | 28 23 | 49 26 | 52 27 # | 56 27 # (22) + +0 3 | 3 5 | 7 10 | 10 12 | 13 13 # | 17 13 # (23) + +0 5 | 3 9 | 7 16 | 10 17 | 13 17 | 17 21 # | 20 21 # (24) + +0 4 | 3 6 | 7 13 | 10 15 | 13 15 | 17 16 | 20 17 # | 24 17 # (25) + +3 0 | 7 4 | 10 5 | 13 9 # | 17 9 # (26) + +3 4 | 7 8 | 10 11 | 13 13 | 17 16 | 20 16 | 24 19 | 28 21 # (27) + +3 8 | 7 14 | 10 18 | 13 18 | 17 19 # | 20 19 # (28) + +3 0 | 7 3 | 10 5 | 13 12 | 17 14 | 20 15 | 24 16 | 28 17 # (29) + +3 7 | 7 17 | 10 17 | 13 18 | 17 19 | 20 20 | 24 21 # | 28 21 # (30) + +3 3 | 7 7 | 10 12 | 13 13 | 17 13 | 20 13 | 24 14 | 28 18 | 49 19 # | 52 19 # (31) + +3 5 | 7 11 | 10 16 | 13 17 | 17 20 | 20 23 | 24 23 | 28 24 # (32) + +3 3 | 7 9 | 10 15 | 13 16 | 17 17 | 20 17 | 24 19 | 28 19 | 49 26 | 52 27 | 56 29 | 59 34 | 63 36 | 66 36 | 70 37 # | 73 37 # (33) + +3 3 | 7 12 | 10 14 | 13 16 | 17 17 | 20 21 # | 24 21 # (34) + +3 3 | 7 7 | 10 7 | 13 13 # | 17 13 # (35) + +3 3 | 7 7 | 10 10 | 13 15 | 17 17 | 20 19 | 24 19 | 28 21 | 49 23 | 52 24 # | 56 24 # (36) + +3 0 | 7 2 | 10 7 | 13 10 | 17 13 | 20 18 | 24 18 | 28 21 # (37) + +3 2 | 7 10 | 10 11 | 13 14 | 17 14 | 20 17 # | 24 17 # (38) + +3 5 | 7 11 | 10 16 | 13 18 # | 17 18 # (39) + +3 2 | 7 5 | 10 8 # | 13 8 # (40) + +3 0 | 7 4 | 10 7 | 13 12 | 17 13 | 20 18 | 24 20 # | 28 20 # (41) + +3 5 | 7 10 | 10 13 | 13 14 | 17 16 # | 20 16 # (42) + +3 5 | 7 10 | 10 11 | 13 15 | 17 15 | 20 16 | 24 21 | 28 21 | 49 27 # | 52 27 # (43) + +3 3 | 7 7 | 10 12 | 13 12 | 17 14 | 20 14 | 24 15 | 28 15 | 49 23 | 52 23 | 56 24 # | 59 24 # (44) + +3 3 | 7 7 | 10 12 | 13 13 # | 17 13 # (45) + +# 3 0 | 7 0 | 10 0 | +13 1 | 17 5 | 20 8 | 24 13 # | 28 13 # (46) + +3 5 | 7 8 | 10 13 # | 13 13 # (47) + +3 3 | 7 4 | 10 12 | 13 17 | 17 18 | 20 20 | 24 23 | 28 23 | 49 29 | 52 30 # | 56 30 # (48) + +3 4 | 7 11 | 10 13 | 13 15 | 17 15 | 20 15 | 24 16 | 28 18 # (49) + +3 9 | 7 11 | 10 14 | 13 16 # | 17 16 # (50) + +3 8 | 7 14 | 10 21 | 13 21 | 17 22 | 20 22 | 24 23 # | 28 23 # (51) + +3 10 | 7 18 | 10 19 | 13 19 | 17 24 | 20 25 # | 24 25 # (52) + +3 2 | 7 3 | 10 7 | 13 8 | 17 12 | 20 13 | 24 14 # | 28 14 # (53) + +3 6 | 7 14 # | 10 14 # (54) + +3 5 | 7 10 | 10 17 | 13 17 | 17 18 | 20 21 | 24 21 | 28 22 # (55) + +3 4 | 7 9 | 10 12 | 13 16 | 17 17 | 20 19 | 24 20 | 28 20 | 49 28 | 52 29 | 56 30 | 59 32 | 63 34 | 66 37 # | 70 37 # (56) + +3 7 | 7 9 | 10 13 | 13 15 | 17 20 | 20 23 | 24 24 | 28 24 | 49 31 | 52 31 | 56 34 | 59 38 | 63 40 # | 66 40 # (57) + +3 5 | 7 11 | 10 13 | 13 13 | 17 14 | 20 15 # | 24 15 # (58) + +3 5 | 7 9 | 10 11 | 13 16 | 17 16 | 20 18 | 24 20 | 28 20 | 49 28 | 52 29 | 56 31 | 59 32 | 63 33 | 66 37 | 70 39 # (59) + +3 4 | 7 7 | 10 13 # | 13 13 # (60) + +3 4 | 7 7 | 10 12 | 13 18 | 17 18 | 20 18 | 24 21 | 28 23 # (61) + +3 5 | 7 11 | 10 16 | 13 16 | 17 17 # | 20 17 # (62) + +3 7 | 7 12 | 10 13 # | 13 13 # (63) + +3 3 | 7 7 | 10 10 # | 13 10 # (64) + +3 7 | 7 10 | 10 13 | 13 15 | 17 16 # | 20 16 # (65) + +3 5 | 7 12 | 10 15 | 13 15 | 17 16 # | 20 16 # (66) + +3 3 | 7 5 | 10 14 | 13 15 | 17 15 | 20 17 | 24 20 | 28 21 | 49 28 # | 52 28 # (67) + +3 4 | 7 9 | 10 13 | 13 16 | 17 16 | 20 17 # | 24 17 # (68) + +3 1 | 7 3 | 10 6 # | 13 6 # (69) + +3 5 | 7 12 | 10 16 # | 13 16 # (70) + +3 6 | 7 11 | 10 13 # | 13 13 # (71) + +3 4 | 7 12 | 10 15 # | 13 15 # (72) + +3 6 | 7 16 | 10 16 | 13 17 # | 17 17 # (73) + +3 3 | 7 8 | 10 14 | 13 16 | 17 18 | 20 19 | 24 20 | 28 22 # (74) + +3 7 | 7 13 | 10 17 # | 13 17 # (75) + +3 7 | 7 10 | 10 13 | 13 13 | 17 15 | 20 17 | 24 19 | 28 20 # (76) + +3 4 | 7 7 | 10 11 | 13 12 # | 17 12 # (77) + +3 7 | 7 13 | 10 16 | 13 16 | 17 17 # | 20 17 # (78) + +3 11 | 7 12 | 10 17 | 13 19 | 17 19 | 20 19 | 24 19 | 28 21 | 49 24 | 52 26 # | 56 26 # (79) + +3 6 | 7 12 | 10 15 # | 13 15 # (80) + +3 7 | 7 14 # | 10 14 # (81) + +3 9 | 7 11 | 10 15 | 13 17 # | 17 17 # (82) + +3 3 | 7 7 | 10 12 # | 13 12 # (83) + +3 2 | 7 4 | 10 12 | 13 12 | 17 15 | 20 16 | 24 19 | 28 20 # (84) + +3 5 | 7 9 | 10 14 | 13 15 # | 17 15 # (85) + +3 3 | 7 7 | 10 15 | 13 17 | 17 17 | 20 20 # | 24 20 # (86) + +3 3 | 7 7 | 10 15 | 13 15 | 17 16 | 20 17 # | 24 17 # (87) + +3 6 | 7 9 | 10 14 # | 13 14 # (88) + +3 5 | 7 8 | 10 16 | 13 17 # | 17 17 # (89) + +3 3 | 7 10 | 10 14 | 13 17 | 17 17 | 20 18 # | 24 18 # (90) + +3 3 | 7 9 | 10 11 | 13 15 | 17 18 | 20 20 | 24 21 | 28 23 # (91) + +3 8 | 7 11 | 10 15 | 13 17 # | 17 17 # (92) + +3 7 | 7 10 | 10 15 # | 13 15 # (93) + +3 5 | 7 11 | 10 16 # | 13 16 # (94) + +3 5 | 7 11 | 10 12 | 13 16 | 17 19 | 20 20 | 24 21 | 28 21 | 49 25 # | 52 25 # (95) + +3 4 | 7 13 | 10 16 | 13 17 | 17 22 | 20 22 | 24 22 | 28 22 | 49 29 | 52 31 | 56 33 | 59 36 | 63 37 | 66 38 # | 70 38 # (96) + +3 7 | 7 13 | 10 15 # | 13 15 # (97) + +3 7 | 7 13 | 10 16 # | 13 16 # (98) + +3 5 | 7 12 | 10 13 | 13 17 # | 17 17 # (99) + +3 7 | 7 14 | 10 16 # | 13 16 # (100) + +3 5 | 7 8 | 10 14 # | 13 14 # (101) + +3 4 | 7 9 | 10 11 # | 13 11 # (102) + +3 5 | 7 8 | 10 12 # | 13 12 # (103) + +3 8 | 7 10 | 10 16 # | 13 16 # (104) + +3 7 | 7 9 | 10 15 # | 13 15 # (105) + +3 8 | 7 12 | 10 15 | 13 17 # | 17 17 # (106) + +3 6 | 7 11 | 10 14 # | 13 14 # (107) + +3 8 | 7 12 | 10 18 | 13 20 | 17 22 # | 20 22 # (108) + +3 2 | 7 7 | 10 8 | 13 9 # | 17 9 # (109) + +3 3 | 7 9 | 10 11 # | 13 11 # (110) + +3 3 | 7 10 | 10 11 | 13 13 | 17 15 | 20 16 | 24 18 | 28 18 | 49 26 | 52 27 # | 56 27 # (111) + +3 4 | 7 8 | 10 14 # | 13 14 # (112) + +3 3 | 7 6 | 10 11 # | 13 11 # (113) + +3 6 | 7 12 | 10 15 | 13 16 | 17 18 # | 20 18 # (114) + +3 5 | 7 10 | 10 15 | 13 16 | 17 19 | 20 20 | 24 20 | 28 20 | 49 29 | 52 29 | 56 30 # | 59 30 # (115) + +3 3 | 7 7 | 10 14 | 13 15 | 17 17 | 20 19 | 24 19 | 28 19 | 49 30 # | 52 30 # (116) + +3 5 | 7 10 | 10 11 | 13 13 # | 17 13 # (117) + +3 2 | 7 4 | 10 6 | 13 9 | 17 10 # | 20 10 # (118) + +3 4 | 7 9 | 10 15 # | 13 15 # (119) + +3 8 | 7 15 | 10 16 | 13 17 | 17 19 | 20 20 | 24 23 | 28 23 | 49 30 | 52 30 | 56 32 | 59 33 # | 63 33 # (120) + +3 6 | 7 12 | 10 13 | 13 14 | 17 18 # | 20 18 # (121) + +3 1 | 7 4 | 10 6 | 13 9 # | 17 9 # (122) + +3 4 | 7 8 | 10 14 | 13 16 | 17 18 | 20 21 | 24 21 | 28 22 | 49 33 | 52 35 | 56 36 | 59 38 | 63 39 # | 66 39 # (123) + +3 0 | 7 2 | 10 5 | 13 7 | 17 9 | 20 13 | 24 13 | 28 15 | 49 21 # | 52 21 # (124) + +3 6 | 7 14 | 10 19 | 13 20 | 17 22 # | 20 22 # (125) + +3 8 | 7 15 | 10 19 | 13 24 # | 17 24 # (126) + +3 9 | 7 14 | 10 15 | 13 23 # | 17 23 # (127) diff --git a/src/openalea/sequence_analysis/data/belren1.hsc b/src/openalea/sequence_analysis/data/belren1.hsc new file mode 100644 index 0000000..a83e13c --- /dev/null +++ b/src/openalea/sequence_analysis/data/belren1.hsc @@ -0,0 +1,89 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +8 STATES + +INITIAL_PROBABILITIES +0.4 0.3 0.3 0.0 0.0 0.0 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.4 0.3 0.3 0.0 0.0 0.0 0.0 +0.0 0.0 0.4 0.3 0.3 0.0 0.0 0.0 +0.0 0.0 0.0 0.4 0.3 0.3 0.0 0.0 +0.0 0.0 0.0 0.0 0.4 0.3 0.3 0.0 +0.0 0.0 0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 5 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 6 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.1 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.3 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.1 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.3 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.1 + +STATE 7 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 diff --git a/src/openalea/sequence_analysis/data/belren1.seq b/src/openalea/sequence_analysis/data/belren1.seq new file mode 100644 index 0000000..dbd8b81 --- /dev/null +++ b/src/openalea/sequence_analysis/data/belren1.seq @@ -0,0 +1,77 @@ +1 VARIABLE + +VARIABLE 1 : INT # 5 values + +# value histogram - size of the sample: 1343 +# mean: 1.30231 variance: 2.66115 standard deviation: 1.6313 + +# | value histogram +# 0 716 latent bud +# 1 153 short shoot +# 2 106 long shoot +# 3 88 fruiting shoot +# 4 280 immediate shoot + +# sequence length histogram - size of the sample: 15 +# mean: 89.5333 variance: 19.4095 standard deviation: 4.40562 + +# cumulative length: 1343 + +2 2 2 2 1 1 1 0 0 0 1 1 1 3 3 1 1 0 1 1 1 0 1 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 \ +0 0 0 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 2 0 0 0 0 1 0 0 0 0 0 0 \ +0 0 0 1 0 0 0 0 0 + +1 1 1 0 1 3 0 1 1 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 2 1 2 1 2 2 0 2 1 0 2 1 1 1 1 1 0 1 0 2 0 1 2 0 0 0 0 0 0 \ +0 0 + +0 0 0 0 0 0 0 0 0 1 0 1 2 0 2 2 0 1 1 0 0 0 0 0 0 0 0 0 3 3 0 0 4 4 4 4 4 4 4 0 0 \ +0 4 4 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 2 1 0 0 0 0 0 1 0 0 2 0 0 0 1 0 0 2 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 + +0 1 2 0 2 1 0 2 0 0 0 3 3 3 3 3 3 3 3 3 1 3 4 3 4 4 4 4 4 4 0 4 4 4 4 4 0 4 4 4 0 \ +4 4 4 4 4 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 0 2 2 0 0 2 2 0 2 0 0 0 0 0 2 \ +0 0 0 0 0 0 0 0 0 0 + +2 2 2 3 3 3 3 3 0 1 0 0 1 1 3 3 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 0 0 4 4 2 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 2 1 0 2 2 2 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 2 2 2 1 1 1 1 1 1 1 3 0 3 1 0 3 0 0 0 3 4 4 4 4 4 0 4 0 4 4 4 4 4 4 4 4 4 4 0 0 \ +4 4 4 4 0 4 4 4 4 4 0 4 0 0 4 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 2 0 2 0 2 2 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 1 3 1 3 0 3 3 1 3 3 3 3 3 0 3 1 3 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 1 2 0 0 0 0 2 0 1 2 0 1 0 0 2 0 1 0 0 2 0 0 1 0 0 0 0 1 0 1 0 0 0 0 \ +0 0 0 0 + +0 0 0 0 1 2 2 0 1 1 2 1 1 0 1 3 1 1 1 3 3 3 3 0 1 1 1 3 1 4 1 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 4 0 0 4 0 4 0 0 2 0 0 0 0 0 0 0 0 2 \ +0 0 2 0 0 0 0 0 0 0 0 0 0 0 + +2 2 2 2 0 1 1 1 1 1 1 1 1 1 3 3 1 3 3 3 1 1 3 3 4 4 4 4 4 4 4 4 4 4 4 4 3 0 4 0 0 \ +0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 + +4 4 0 2 2 1 2 0 1 3 0 1 3 0 3 0 3 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 0 4 0 4 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 + +0 0 0 0 0 1 0 1 1 0 1 1 1 1 1 1 1 1 1 3 0 3 0 3 4 4 4 4 4 1 4 4 4 4 4 4 1 4 4 4 0 \ +0 4 3 4 0 3 4 4 4 4 1 1 0 0 0 2 0 4 1 4 4 1 0 1 0 0 0 0 0 0 2 2 2 2 2 0 0 0 0 2 0 \ +0 0 0 0 0 0 0 0 0 + +2 2 2 2 2 1 1 1 1 1 1 1 2 3 2 3 1 1 3 2 4 4 1 1 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 0 4 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 4 4 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 + +0 0 0 1 1 2 1 1 1 1 1 3 0 2 1 0 3 3 1 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 \ +0 1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 1 1 1 1 1 2 0 1 3 1 1 3 0 3 1 1 1 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 0 0 3 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 4 4 0 0 4 2 2 2 2 0 0 0 0 2 0 0 2 0 2 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 2 1 1 1 0 2 0 0 1 0 1 0 2 3 1 0 3 0 3 4 4 0 4 4 4 4 4 4 4 4 4 0 4 4 0 \ +4 0 0 0 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/cafe_ortho1.seq b/src/openalea/sequence_analysis/data/cafe_ortho1.seq new file mode 100644 index 0000000..3886a07 --- /dev/null +++ b/src/openalea/sequence_analysis/data/cafe_ortho1.seq @@ -0,0 +1,200 @@ +1 VARIABLE + +VARIABLE 1 : INT # (minimum value: 1, maximum value: 8) + +# value histogram - sample size: 5976 +# mean: 4.15914 variance: 2.82672 standard deviation: 1.68129 + +# | value histogram +# 0 0 +# 1 367 +# 2 873 +# 3 984 +# 4 884 +# 5 1279 +# 6 1265 +# 7 323 +# 8 1 + +# sequence length histogram - sample size: 36 +# mean: 166 variance: 144 standard deviation: 12 + +# cumulative length: 5976 + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (1) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (2) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 # (3) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 # (4) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (5) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (6) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 # (7) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (8) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (9) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (10) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 # (11) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (12) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (13) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 # (14) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (15) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (16) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (17) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (18) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (19) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 # (20) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (21) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (22) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (23) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (24) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 # (25) + +1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (26) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 # (27) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 # (28) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 # (29) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 # (30) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (31) + +1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (32) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 # (33) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (34) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 # (35) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (36) diff --git a/src/openalea/sequence_analysis/data/cafe_ortho2.seq b/src/openalea/sequence_analysis/data/cafe_ortho2.seq new file mode 100644 index 0000000..b4a812e --- /dev/null +++ b/src/openalea/sequence_analysis/data/cafe_ortho2.seq @@ -0,0 +1,192 @@ +1 VARIABLE + +VARIABLE 1 : INT # (minimum value: 1, maximum value: 8) + +# value histogram - sample size: 5712 +# mean: 4.42367 variance: 3.0826 standard deviation: 1.75573 + +# | value histogram +# 0 0 +# 1 293 +# 2 839 +# 3 677 +# 4 752 +# 5 1260 +# 6 1317 +# 7 536 +# 8 38 + +# sequence length histogram - sample size: 34 +# mean: 168 variance: 0 standard deviation: 0 + +# cumulative length: 5712 + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 # (1) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (2) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (3) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (4) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (5) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 # (6) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (7) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (8) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 \ +8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 # (9) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (10) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (11) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (12) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (13) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (14) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (15) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (16) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (17) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (18) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (19) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (20) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (21) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (22) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (23) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 # (24) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 # (25) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 # (26) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (27) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 # (28) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (29) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (30) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (31) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (32) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (33) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (34) diff --git a/src/openalea/sequence_analysis/data/cafe_ortho3.seq b/src/openalea/sequence_analysis/data/cafe_ortho3.seq new file mode 100644 index 0000000..1c30bc7 --- /dev/null +++ b/src/openalea/sequence_analysis/data/cafe_ortho3.seq @@ -0,0 +1,150 @@ +1 VARIABLE + +VARIABLE 1 : INT # (minimum value: 1, maximum value: 7) + +# value histogram - sample size: 4313 +# mean: 4.00278 variance: 2.5691 standard deviation: 1.60284 + +# | value histogram +# 0 0 +# 1 300 +# 2 643 +# 3 678 +# 4 806 +# 5 1032 +# 6 718 +# 7 136 + +# sequence length histogram - sample size: 26 +# mean: 165.885 variance: 116.346 standard deviation: 10.7864 + +# cumulative length: 4313 + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 # (1) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (2) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (3) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (4) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 # (5) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 # (6) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (7) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (8) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (9) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (10) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 # (11) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (12) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 # (13) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 # (14) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 # (15) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 # (16) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (17) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (18) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (19) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (20) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (21) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 # (22) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (23) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 # (24) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (25) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (26) diff --git a/src/openalea/sequence_analysis/data/cafe_ortho4.seq b/src/openalea/sequence_analysis/data/cafe_ortho4.seq new file mode 100644 index 0000000..1cc21e2 --- /dev/null +++ b/src/openalea/sequence_analysis/data/cafe_ortho4.seq @@ -0,0 +1,149 @@ +1 VARIABLE + +VARIABLE 1 : INT # (minimum value: 1, maximum value: 7) + +# value histogram - sample size: 4257 +# mean: 4.21917 variance: 2.9005 standard deviation: 1.70308 + +# | value histogram +# 0 0 +# 1 261 +# 2 647 +# 3 556 +# 4 665 +# 5 997 +# 6 824 +# 7 307 + +# sequence length histogram - sample size: 26 +# mean: 163.731 variance: 473.885 standard deviation: 21.7689 + +# cumulative length: 4257 + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 # (1) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (2) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (3) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (4) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (5) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (6) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (7) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (8) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (9) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (10) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (11) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (12) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (13) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (14) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (15) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (16) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (17) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (18) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (19) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (20) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (21) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (22) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (23) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (24) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 # (25) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (26) diff --git a/src/openalea/sequence_analysis/data/cafe_ortho5.seq b/src/openalea/sequence_analysis/data/cafe_ortho5.seq new file mode 100644 index 0000000..a8f2634 --- /dev/null +++ b/src/openalea/sequence_analysis/data/cafe_ortho5.seq @@ -0,0 +1,192 @@ +1 VARIABLE + +VARIABLE 1 : INT # (minimum value: 1, maximum value: 8) + +# value histogram - sample size: 5712 +# mean: 4.20816 variance: 2.69156 standard deviation: 1.6406 + +# | value histogram +# 0 0 +# 1 325 +# 2 743 +# 3 875 +# 4 1036 +# 5 1296 +# 6 1096 +# 7 327 +# 8 14 + +# sequence length histogram - sample size: 34 +# mean: 168 variance: 0 standard deviation: 0 + +# cumulative length: 5712 + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (1) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (2) + +1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (3) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 # (4) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (5) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 # (6) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (7) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (8) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (9) + +1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 # (10) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 # (11) + +1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 # (12) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (13) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 # (14) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (15) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (16) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (17) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (18) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (19) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (20) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 # (21) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (22) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 # (23) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (24) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (25) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 # (26) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 # (27) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (28) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 # (29) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (30) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (31) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 # (32) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (33) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 # (34) diff --git a/src/openalea/sequence_analysis/data/cafe_ortho6.seq b/src/openalea/sequence_analysis/data/cafe_ortho6.seq new file mode 100644 index 0000000..5523324 --- /dev/null +++ b/src/openalea/sequence_analysis/data/cafe_ortho6.seq @@ -0,0 +1,242 @@ +1 VARIABLE + +VARIABLE 1 : INT # (minimum value: 1, maximum value: 8) + +# value histogram - sample size: 7392 +# mean: 4.42411 variance: 2.92672 standard deviation: 1.71077 + +# | value histogram +# 0 0 +# 1 371 +# 2 955 +# 3 1010 +# 4 980 +# 5 1690 +# 6 1694 +# 7 678 +# 8 14 + +# sequence length histogram - sample size: 44 +# mean: 168 variance: 0 standard deviation: 0 + +# cumulative length: 7392 + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (1) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (2) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (3) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 # (4) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (5) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (6) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (7) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (8) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 # (9) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (10) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 # (11) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 # (12) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 # (13) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (14) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (15) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (16) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 # (17) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (18) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (19) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (20) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (21) + +1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (22) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 # (23) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (24) + +1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (25) + +1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (26) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (27) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (28) + +1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 # (29) + +1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (30) + +1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (31) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 # (32) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 # (33) + +1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 # (34) + +1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 # (35) + +1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (36) + +1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 # (37) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (38) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 # (39) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 # (40) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 # (41) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 \ +4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 \ +7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 # (42) + +1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (43) + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 \ +5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 \ +6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 # (44) diff --git a/src/openalea/sequence_analysis/data/chene_sessile_15pa.seq b/src/openalea/sequence_analysis/data/chene_sessile_15pa.seq new file mode 100644 index 0000000..ebd14db --- /dev/null +++ b/src/openalea/sequence_analysis/data/chene_sessile_15pa.seq @@ -0,0 +1,117 @@ +######################################################################### +# +# Successive annual shoots of sessile oaks (observed in Meuse, France). +# +######################################################################### + + +6 VARIABLES + +VARIABLE 1 : STATE # year of growth (95, 96, 97) + +VARIABLE 2 : STATE # annual shoot length in cm + +VARIABLE 3 : STATE # shoot diameter in tenths of mm + +VARIABLE 4 : STATE # number of growth cycles + +VARIABLE 5 : STATE # number of metamers + +VARIABLE 6 : STATE # number of branches + +# sequence length histogram - sample size: 46 +# mean: 3 variance: 0 standard deviation: 0 + +# cumulative length: 138 + +95 110 219 2 52 14 | 96 17 119 2 24 9 | 97 57 101 2 33 1 # (1) + +95 88 214 2 50 12 | 96 35 127 1 18 8 | 97 77 92 2 40 0 # (2) + +95 59 136 2 35 6 | 96 14 93 1 17 10 | 97 68 86 2 40 2 # (3) + +95 79 180 2 55 7 | 96 26 132 1 18 8 | 97 59 129 2 30 0 # (4) + +95 65 210 2 40 6 | 96 31 123 1 33 11 | 97 69 104 2 36 1 # (5) + +95 35 127 2 28 6 | 96 18 106 2 27 5 | 97 60 103 2 33 1 # (6) + +95 23 145 2 16 8 | 96 24 103 1 9 3 | 97 71 89 2 54 0 # (7) + +95 45 113 2 33 10 | 96 8 85 1 12 2 | 97 35 57 1 22 0 # (8) + +95 83 157 2 35 7 | 96 4 95 1 10 3 | 97 37 68 2 29 0 # (9) + +95 33 109 2 24 6 | 96 12 66 1 17 4 | 97 53 63 2 40 1 # (10) + +95 63 112 2 52 14 | 96 25 60 1 19 8 | 97 6 39 1 12 0 # (11) + +95 55 124 3 27 8 | 96 23 70 1 25 2 | 97 20 64 1 21 0 # (12) + +95 70 125 2 42 14 | 96 26 102 1 17 7 | 97 50 71 2 33 0 # (13) + +95 79 119 2 52 9 | 96 26 73 1 22 6 | 97 9 41 1 13 0 # (14) + +95 69 142 2 33 12 | 96 26 85 1 22 8 | 97 21 51 1 14 0 # (15) + +95 50 146 2 40 8 | 96 22 95 1 22 6 | 97 26 51 1 20 0 # (16) + +95 81 145 2 53 19 | 96 15 61 1 23 3 | 97 19 53 2 21 0 # (17) + +95 74 160 2 45 8 | 96 38 102 1 22 5 | 97 34 68 1 33 0 # (18) + +95 59 177 2 35 1 | 96 29 140 1 22 5 | 97 78 101 2 53 2 # (19) + +95 49 172 2 37 9 | 96 50 144 3 42 9 | 97 62 70 2 50 2 # (20) + +95 14 109 1 16 1 | 96 33 100 2 21 5 | 97 57 79 2 45 0 # (21) + +95 70 142 2 61 12 | 96 26 89 1 29 5 | 97 28 79 1 32 0 # (22) + +95 66 129 2 58 7 | 96 38 90 1 29 5 | 97 24 43 1 21 0 # (23) + +95 19 90 2 22 3 | 96 6 84 1 8 1 | 97 63 71 2 37 2 # (24) + +95 56 116 2 47 8 | 96 16 76 1 20 5 | 97 21 45 1 19 0 # (25) + +95 73 169 2 35 11 | 96 27 126 1 16 9 | 97 35 93 1 16 0 # (26) + +95 53 141 2 29 17 | 96 33 93 1 20 4 | 97 24 60 1 23 0 # (27) + +95 51 132 3 51 14 | 96 28 117 1 23 4 | 97 90 122 2 53 7 # (28) + +95 58 152 4 50 15 | 96 15 96 1 11 2 | 97 75 86 2 42 2 # (29) + +95 39 138 2 37 9 | 96 35 106 1 22 7 | 97 38 85 1 26 0 # (30) + +95 37 163 2 28 8 | 96 29 116 2 38 6 | 97 58 102 2 44 3 # (31) + +95 70 136 2 54 10 | 96 25 75 1 23 4 | 97 9 50 1 15 0 # (32) + +95 42 135 2 33 6 | 96 23 111 1 17 2 | 97 79 96 2 37 2 # (33) + +95 63 131 2 40 15 | 96 29 79 1 17 4 | 97 23 86 2 20 2 # (34) + +95 34 130 2 27 6 | 96 12 102 1 13 4 | 97 57 85 2 39 1 # (35) + +95 72 176 2 41 5 | 96 30 99 2 31 7 | 97 28 70 1 19 0 # (36) + +95 61 119 2 45 5 | 96 30 116 1 23 7 | 97 40 87 1 38 0 # (37) + +95 95 210 2 61 17 | 96 20 120 1 18 5 | 97 66 99 2 46 0 # (38) + +95 54 150 2 38 9 | 96 36 94 1 23 8 | 97 31 63 2 24 0 # (39) + +95 93 134 3 59 14 | 96 16 90 1 15 2 | 97 38 69 2 29 0 # (40) + +95 72 190 2 51 15 | 96 33 116 2 27 7 | 97 76 85 2 50 1 # (41) + +95 38 95 2 27 12 | 96 11 73 1 11 3 | 97 42 61 2 24 0 # (42) + +95 20 138 2 24 4 | 96 19 113 1 15 3 | 97 42 94 1 33 0 # (43) + +95 65 175 2 63 12 | 96 32 122 1 27 8 | 97 34 81 1 25 0 # (44) + +95 58 145 2 42 9 | 96 19 95 1 18 3 | 97 85 86 3 57 2 # (45) + +95 46 155 2 28 6 | 96 24 107 2 26 9 | 97 31 70 2 21 0 # (46) diff --git a/src/openalea/sequence_analysis/data/compound1.cd b/src/openalea/sequence_analysis/data/compound1.cd new file mode 100644 index 0000000..e05a45e --- /dev/null +++ b/src/openalea/sequence_analysis/data/compound1.cd @@ -0,0 +1,7 @@ +COMPOUND_DISTRIBUTION + +SUM_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 2 PROBABILITY : 0.5 + +ELEMENTARY_DISTRIBUTION +BINOMIAL INF_BOUND : 2 SUP_BOUND : 5 PROBABILITY : 0.5 \ No newline at end of file diff --git a/src/openalea/sequence_analysis/data/dupreziana21.hc b/src/openalea/sequence_analysis/data/dupreziana21.hc new file mode 100644 index 0000000..c996fb3 --- /dev/null +++ b/src/openalea/sequence_analysis/data/dupreziana21.hc @@ -0,0 +1,44 @@ +HIDDEN_MARKOV_CHAIN + +3 STATES + +INITIAL_PROBABILITIES +0.8 0.1 0.1 + +TRANSITION_PROBABILITIES # memory +# 0.352885 0.547115 0.1 0 non-terminal +# 0.381192 0.423546 0.195262 0 0 non-terminal +0.496324 0.403676 0.1 0 0 0 # terminal +0.367958 0.397887 0.234155 1 0 0 # terminal +0.1 0.459574 0.440426 2 0 0 # terminal +0.256309 0.643691 0.1 1 0 # terminal +0.680294 0.219706 0.1 2 0 # terminal +# 0.7594 0.1406 0.1 1 non-terminal +0.782464 0.117536 0.1 0 1 # terminal +0.625846 0.274154 0.1 1 1 # terminal +0.72 0.18 0.1 2 1 # terminal +0.8 0.1 0.1 2 # terminal + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.4 +OUTPUT 1 : 0.3 +OUTPUT 2 : 0.3 + +# OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.4 +OUTPUT 2 : 0.3 + +# OUTPUT 0 : 0.4 +# OUTPUT 1 : 0.6 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.3 +OUTPUT 2 : 0.4 diff --git a/src/openalea/sequence_analysis/data/dupreziana_20a2.seq b/src/openalea/sequence_analysis/data/dupreziana_20a2.seq new file mode 100644 index 0000000..3de6321 --- /dev/null +++ b/src/openalea/sequence_analysis/data/dupreziana_20a2.seq @@ -0,0 +1,112 @@ +# 2 A2 par arbres au niveau 20 EN du sommet des troncs> +# UNE SEULE PHYLLOTAXIE : OPPOSEE DECUSSE + +1 VARIABLE + +VARIABLE 1 : INT # nombre de rameaux portes : 0, 1 ou 2 + + +0 1 0 1 0 1 0 2 0 0 2 0 1 0 0 2 0 0 0 0 1 0 0 2 0 0 1 0 1 0 0 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 1 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0 0 0 + +0 1 0 0 2 0 0 1 0 1 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 +0 2 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 + +0 1 0 1 0 1 0 0 1 +0 1 0 0 0 1 0 0 0 0 + +0 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 +0 1 0 1 0 0 0 0 1 0 1 1 0 + +0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 +0 1 0 1 0 1 0 0 2 0 0 1 0 0 0 0 0 0 0 + +0 1 0 1 0 1 0 0 1 0 0 2 0 1 0 1 0 1 1 0 0 0 0 0 +0 1 0 2 0 1 1 0 0 0 0 2 0 0 1 0 1 0 0 0 0 0 0 0 0 + +0 1 0 1 0 1 1 0 1 0 1 0 1 0 1 0 0 1 0 0 0 0 +0 1 0 0 1 1 0 0 + +0 2 0 0 2 0 0 2 0 0 1 0 1 0 0 1 0 1 1 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 0 1 0 1 0 + +0 1 0 1 0 1 1 0 1 0 0 2 0 0 1 0 0 1 0 0 1 0 0 2 0 0 1 0 0 0 0 0 +0 1 0 1 0 1 1 0 1 0 1 2 0 0 1 0 1 0 1 1 0 1 0 0 1 0 0 0 0 + +0 2 0 1 2 0 0 2 0 0 2 0 2 0 0 2 0 0 2 0 0 0 0 0 0 +0 2 0 0 1 0 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 1 0 0 0 1 0 0 +0 1 0 1 0 0 0 0 1 0 0 0 0 + +0 1 0 1 0 1 0 0 1 0 0 0 +0 1 0 1 0 1 0 0 + +0 1 0 1 0 1 0 0 2 0 0 0 0 0 2 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 0 0 1 0 1 0 0 0 0 0 + +0 1 0 1 0 1 0 0 2 0 0 2 0 0 1 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 1 0 0 0 0 + +0 1 0 1 0 1 0 0 2 0 0 2 0 0 1 0 0 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 + +0 1 0 0 0 1 0 0 2 0 0 2 0 0 1 1 0 0 0 +0 1 0 1 0 1 0 1 1 0 1 0 0 1 0 0 2 0 0 0 0 0 + +0 1 0 0 0 1 0 0 2 0 0 2 0 0 1 0 0 2 0 0 1 0 0 0 0 0 +0 1 0 1 0 1 0 0 1 0 0 1 0 0 2 0 0 2 0 0 1 0 0 0 0 0 0 + +0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 + +0 2 0 0 0 1 0 0 2 0 0 1 0 0 2 0 0 2 0 0 0 0 0 0 +0 1 0 1 0 1 0 0 0 0 0 2 0 0 1 0 0 0 0 0 0 0 0 + +0 1 0 1 0 1 0 1 0 1 0 2 0 1 1 0 0 0 0 0 0 +0 1 0 1 0 1 0 1 0 1 0 0 2 0 0 2 0 1 0 1 1 0 0 0 0 0 0 0 + +0 1 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 +0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 0 + +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 2 0 2 0 0 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 2 0 0 2 0 0 0 0 0 0 + +0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 +0 1 0 1 0 1 0 0 1 0 0 2 0 0 0 0 0 0 + +0 1 0 1 0 1 0 0 1 0 +0 1 0 1 0 1 0 0 2 0 0 + +0 1 0 1 0 1 0 0 2 0 0 1 0 1 0 1 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 0 0 + +0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 +0 1 0 1 0 1 0 1 0 1 0 1 0 1 1 1 0 0 0 0 0 + +0 0 0 1 0 1 0 0 0 0 +0 1 0 1 0 1 0 1 0 + +0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 +0 1 0 1 0 1 0 1 0 1 0 1 1 1 0 0 0 0 0 + +0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 +0 0 0 1 0 0 0 0 2 0 0 1 0 0 2 0 0 2 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 + +0 1 0 0 0 +0 1 0 1 0 1 0 0 0 + +0 1 0 1 0 1 0 1 0 1 1 1 0 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 1 0 0 2 0 0 2 0 0 0 0 0 + +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 2 0 0 2 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 0 0 + +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 0 0 +0 1 0 1 0 1 0 0 2 0 0 2 0 0 2 0 0 0 0 0 + +0 1 0 1 0 1 0 0 2 0 0 2 0 0 0 0 +0 1 0 1 0 1 0 0 1 0 0 2 0 0 0 0 0 + +0 2 0 0 2 0 0 2 0 0 0 0 1 1 0 1 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 +0 1 0 1 0 1 0 1 1 0 1 0 0 2 0 0 1 1 1 0 1 0 0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/dupreziana_40a2.seq b/src/openalea/sequence_analysis/data/dupreziana_40a2.seq new file mode 100644 index 0000000..5501003 --- /dev/null +++ b/src/openalea/sequence_analysis/data/dupreziana_40a2.seq @@ -0,0 +1,319 @@ +# cupressus dupreziana eleves en pepiniere au ruscas , ages de trois ans , mesures en juin 96 +# 2 a2 par arbre au niveau 40 EN du sommet +# 2 phyllotaxies :opposee decussee puis verticille de trois (pas observable sur tout les rameaux) + +2 VARIABLES + +VARIABLE 1 : INT # phyllotaxie par 2 ou par trois 3 +VARIABLE 2 : INT # nombre de rameaux portes ( 0 1 2 ou 3 ) + + +# arbre 54 +# p40a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p42a2 +2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 \ +2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 52 +# p42a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p42a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 51 +# p38a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p39a2 +2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 50 +# p41a2 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p42a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 1 +# p39a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 +# p40a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 7 +# p41a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p42a2 +2 0 | 2 2 | 2 0 | 2 2 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 2 | 2 1 | 2 1 | 2 2 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 + +# arbre 12 +# p40a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 +# p42a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 \ +3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 + +# arbre11 +# p39a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p40a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 10 +# p41a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 +# p42a2 +2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 + +# arbre18 +# p39a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 +# p40a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 2 | 2 0 | 2 1 | 2 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 17 +# p39a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre15 +# p41a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre14 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 20 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 + +# arbre 21 +# p40a2 +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p40a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 + +# arbre22 +# p43a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 +# p43a2 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 + +# arbre23 +# p39a2 +2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 25 +# p40a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre26 +# p40a2 +2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 +# p42a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 + +# arbre27 +# p42a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 +# p42a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 + +# arbre 29 +# p40a2 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 +# p40a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 31 +# p38a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p40a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre36 +# p38a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p40a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 + +# arbre37 +# p42a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p42a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 3 0 | 3 0 + +# arbre38 +# p39a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 +# p39a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 0 + +# arbre 40 +# p42a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 +# p42a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 + +# arbre 41 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 + +# arbre 42 +# p40a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 +# p40a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 + +# arbre 43 +# p39a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p40a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 45 +# p39a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 +# p40a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 47 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 48 +# p40a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 +# p40a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 55 +# p39a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 + +# arbre 56 +# p39a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 3 2 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 +# p40a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 57 +# p40a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 +# p41a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 diff --git a/src/openalea/sequence_analysis/data/dupreziana_60a2.seq b/src/openalea/sequence_analysis/data/dupreziana_60a2.seq new file mode 100644 index 0000000..5ba80f7 --- /dev/null +++ b/src/openalea/sequence_analysis/data/dupreziana_60a2.seq @@ -0,0 +1,408 @@ +# cupressus dupreziana agees de trois ans eleves en pepiniere au ruscas +# mesures en juin 96 +# 2 a2 par arbre au niveau 60 EN du sommet du tronc +# 2 phyllotaxies : opposee decussee puis verticillee par trois +# ( le verticille de trois feuilles ne s'observe pas sur tout les a2 ) + +2 VARIABLES + +VARIABLE 1 : INT # phyllotaxie par 2 ou 3 +VARIABLE 2 : INT # nombre de rameaux portes + +# arbre 54 +# P60a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p61a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 0 | 3 0 + +# arbre 52 +# p61a2 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p63a2 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 51 +# p63a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p63a2 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 50 +# p59a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p60a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 + +# arbre 1 +# p60a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p60a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 3 2 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 7 +# p57a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p57a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 + +# arbre12 +# p60a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 +# p61a2 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 + +# arbre 11 +# p62a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 +# p63a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 10 +# p61a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 +# p62a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 18 +# p60a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 2 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 +# p60a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 + +# arbre 17 +# p59a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 2 | 2 0 | 2 0 | 2 0 +# p59a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 \ +2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 + +# arbre 15 +# p60a2 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p60a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 + +# arbre 14 +# p60a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 +# p60a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 20 +# p60a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 +# p60a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 21 +# p61a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 +# p61a2 +2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 \ +3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 22 +# p61a2 +2 0 | 2 2 | 2 0 | 2 2 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 +# p62a2 +2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 23 +# p59a2 +2 0 | 2 2 | 2 1 | 2 0 | 2 2 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 \ +3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 \ +3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 +# p59a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 25 +# p60a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 +# p61a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 26 +# p58a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 2 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 +# p58a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 2 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 27 +# p61a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p61a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 29 +# p59a2 +2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p60a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 31 +# p61a2 +2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 +# p63a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 \ +3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 + +# arbre 36 +# p60a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 +# p62 +2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 1 + +# arbre 37 +# p59a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p60a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre38 +# p60a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p61a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre40 +# p61a2 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 +# p62a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 + +# arbre 41 +# p53a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 +# p53a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 + +# arbre 42 +# p60a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 +# p60a2 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 + +# arbre 43 +# p61a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p61a2 +2 0 | 2 2 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 3 0 | 3 2 | 3 0 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 + +# arbre 45 +# p61a2 +2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 3 0 | 3 0 +# p61a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 \ +3 2 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 + +# arbre 47 +# p61a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 +# p61a2 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 48 +# p61a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 +# p61a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 3 0 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 + +# arbre 55 +# p59a2 +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p61a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 56 +# p60a2 +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 +# p61a2 +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 2 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 3 0 | 3 0 | 3 0 + +#arbre 57 +# p59a2 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p59a2 +2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 diff --git a/src/openalea/sequence_analysis/data/dupreziana_80a2.seq b/src/openalea/sequence_analysis/data/dupreziana_80a2.seq new file mode 100644 index 0000000..c8dc5a0 --- /dev/null +++ b/src/openalea/sequence_analysis/data/dupreziana_80a2.seq @@ -0,0 +1,314 @@ +# cupressus dupreziana eleves en pepiniere au ruscas , ages de trois ans , mesures en juin 96 +# 2 a3 par arbre au niveau 80 EN du sommet +# 2 phyllotaxies :opposee decussee puis verticille de trois (pas observable sur tout les rameaux) + +2 VARIABLES + +VARIABLE 1 : INT # phyllotaxie par 2 ou par 3 +VARIABLE 2 : INT # nombre de rameaux portes + + +# arbre 54 +# P80a3 +2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 +# P82a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 80 +# p79a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p79a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 51 +# p81a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p83a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 + +# arbre50 +# p81a3 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 +# p81a3 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 + +# arbre 1 +# p80a3 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p81a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 \ +3 0 + +# arbre 7 +# p77a3 +2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p79a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 12 +# p78a3 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p80a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 3 1 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 11 +# p80 a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p82 a3 +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 + +# arbre 10 +# p79a3 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 1 | 2 0 +# p80a3 +2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 18 +# p78a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p81 a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 17 +# p81a3 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 +# p81a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 15 +# p78a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p80a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 14 +# p80a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p81a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 + + +# arbre 20 +# p80a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p81a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 21 +# p76a3 +2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 +# p80a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 2 + +# arbre22 +# p80a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 +# p80a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 23 +# p80a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p81a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 25 +# p82a3 +2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 \ +3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 +# p82a3 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 26 +# p79a3 +2 1 | 2 2 | 2 1 | 2 1 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p79a3 +2 0 | 2 1 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 \ +3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 0 + +# arbre 27 +# p83a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p82a3 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 29 +# p77a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 +# p78a3 +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 31 +# p81a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 2 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 +# p81a3 +2 0 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 + +# arbre 36 +# p79a3 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 2 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 3 0 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 +# p79a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 \ +2 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 \ +3 2 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 37 +# p77a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p78 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 + +# arbre 38 +# p79 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p80a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 + +# arbre 40 +# p78 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p80a3 +2 0 | 2 2 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 2 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 42 +# p81a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 +# p81a3 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre43 +# p80a3 +2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p81a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 45 +# p77 +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p79a3 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 \ +2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 + +# arbre 47 +# p79a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 +# p80a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 + +# arbre 48 +# p79a3 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 +# p81a3 +2 0 | 2 2 | 2 0 | 2 1 | 2 1 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 0 + +# arbre 55 +# p79a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 +# p80a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 + +# arbre56 +# p78a3 +2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 +# p80a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 1 \ +2 0 | 2 0 | 2 1 | 2 0 + +# arbre 57 +# p78a3 +2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 2 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 \ +2 0 | 2 0 +# p79a3 +2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 0 | 2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 0 | 2 1 | 2 0 \ +2 1 | 2 0 | 2 1 | 2 0 | 2 0 | 2 0 | 2 0 diff --git a/src/openalea/sequence_analysis/data/dupreziana_a1.seq b/src/openalea/sequence_analysis/data/dupreziana_a1.seq new file mode 100644 index 0000000..98ffa21 --- /dev/null +++ b/src/openalea/sequence_analysis/data/dupreziana_a1.seq @@ -0,0 +1,291 @@ +# Cypres dupreziana de 3 ans, eleves en pepiniere au Ruscas, plantes en 93 +# mesures en juin 96. Fichier contenant les troncs de 33 arbres sans mort d'apex. +# Dans ce fichier, les petits rameaux differes de la base ne sont pas pris en compte. + +2 VARIABLES + +VARIABLE 1 : STATE # phyllotaxie par 4 ou par 3 +VARIABLE 2 : STATE # nombre de rameaux porte (0 a 4 ou 3 fonction de la phyllotaxie) + + +# arbre 54 +4 0 | 4 0 | 4 0 | 4 0 | 4 2 | 4 1 | 4 0 | 4 0 | 4 3 | 4 1 | 4 1 | 4 2 | 4 1 | 4 1 | 4 1 | 4 1 | 4 2 | 4 1 \ +4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 2 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 1 \ +3 2 | 3 0 | 3 1 | 3 0 | 3 0 + +# arbre 14 +4 0 | 4 0 | 4 4 | 4 1 | 4 2 | 4 1 | 4 0 | 4 2 | 4 2 | 4 2 | 4 3 | 4 2 | 4 1 | 4 1 | 4 2 | 4 1 | 4 0 | 4 1 \ +4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 2 | 4 1 | 4 1 | 4 1 | 4 0 \ +4 1 | 4 2 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 1 | 4 2 | 4 0 | 4 2 | 4 0 | 4 0 | 4 3 | 4 0 | 4 0 | 4 1 | 4 1 \ +4 1 | 4 1 | 4 0 | 4 0 | 4 2 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 2 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 \ +4 0 | 4 1 | 4 0 | 4 2 | 4 1 | 4 1 | 4 2 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 1 | 4 2 | 4 1 | 4 0 | 4 1 | 4 1 \ +4 1 | 4 0 | 4 1 | 4 0 | 4 0 | 4 0 | 4 0 + +# arbre 15 +4 0 | 4 0 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 3 | 4 1 | 4 0 | 4 0 | 4 2 | 4 2 | 4 0 | 4 1 | 4 0 | 4 0 | 4 0 \ +4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 2 | 4 0 | 4 1 | 4 1 \ +4 1 | 4 1 | 4 1 | 4 2 | 4 1 | 4 0 | 4 2 | 4 1 | 4 1 | 4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 2 | 4 1 | 4 0 | 4 1 \ +4 0 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 0 | 4 2 | 4 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 51 +4 0 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 2 | 4 2 | 4 0 | 4 0 | 4 1 | 4 1 \ +3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 0 + +# arbre 50 +4 2 | 4 0 | 4 2 | 4 1 | 4 1 | 4 1 | 4 2 | 4 2 | 4 1 | 4 2 | 4 1 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 2 | 4 2 \ +4 1 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 2 | 3 2 | 3 1 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 \ +3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 + +# arbre 1 +4 0 | 4 0 | 4 1 | 4 1 | 4 2 | 4 0 | 4 2 | 4 2 | 4 2 | 4 2 | 4 1 | 4 1 | 4 1 | 4 2 | 4 2 | 4 1 | 4 3 | 4 1 \ +4 2 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 1 | 3 2 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 0 + +# arbre 7 +4 1 | 4 2 | 4 3 | 4 3 | 4 4 | 4 3 | 4 2 | 4 2 | 4 3 | 4 3 | 4 2 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 0 \ +4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 \ +3 3 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 \ +3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 0 + +# arbre 12 +4 2 | 4 1 | 4 2 | 4 3 | 4 3 | 4 2 | 4 4 | 4 3 | 4 4 | 4 3 | 4 2 | 4 3 | 4 1 | 4 3 | 4 0 | 4 4 | 4 1 | 4 1 \ +4 0 | 4 1 | 4 1 | 4 2 | 4 2 | 4 2 | 4 2 | 4 2 | 4 0 | 4 0 | 4 3 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 \ +4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 \ +4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 2 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 0 + +# arbre 11 +4 0 | 4 0 | 4 2 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 2 | 4 1 | 4 3 | 4 0 | 4 2 | 4 0 | 4 2 | 4 1 | 4 1 \ +4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 0 | 4 2 | 4 0 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 + +# arbre 10 +4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 2 | 4 0 | 4 3 | 4 1 | 4 3 | 4 0 | 4 1 | 4 1 | 4 2 | 4 1 | 4 0 \ +4 0 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 2 | 3 0 | 3 0 | 3 2 | 3 1 | 3 0 | 3 0 | 3 2 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 0 | 3 0 + +# arbre 18 +4 0 | 4 2 | 4 2 | 4 3 | 4 1 | 4 2 | 4 2 | 4 1 | 4 1 | 4 2 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 4 2 | 4 0 \ +4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 \ +4 1 | 4 2 | 4 0 | 4 1 | 4 1 | 4 0 | 4 2 | 4 1 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 1 \ +4 2 | 4 0 | 4 2 | 4 0 | 4 0 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 \ +4 1 | 4 0 | 4 2 | 4 0 | 4 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 2 | 3 0 + +# arbre 17 +4 1 | 4 1 | 4 3 | 4 0 | 4 4 | 4 0 | 4 2 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 \ +4 0 | 4 0 | 4 0 | 4 0 | 4 1 | 4 0 | 4 0 | 4 0 | 4 1 | 4 0 | 4 0 | 4 0 | 4 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 2 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 2 \ +3 0 | 3 1 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 + +# arbre 20 +4 1 | 4 1 | 4 1 | 4 2 | 4 1 | 4 3 | 4 1 | 4 2 | 4 1 | 4 1 | 4 0 | 4 1 | 4 2 | 4 1 | 4 1 | 4 2 | 4 0 | 4 2 \ +4 1 | 4 2 | 4 0 | 4 0 | 4 0 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 0 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 21 +4 0 | 4 0 | 4 1 | 4 1 | 4 2 | 4 2 | 4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 4 2 | 4 0 | 4 2 | 4 1 | 4 1 | 4 0 | 4 2 \ +4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 0 | 4 0 | 4 1 | 4 0 | 4 0 | 4 0 | 4 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 23 +4 2 | 4 3 | 4 4 | 4 4 | 4 3 | 4 3 | 4 2 | 4 3 | 4 2 | 4 3 | 4 2 | 4 2 | 4 2 | 4 1 | 4 2 | 4 0 | 4 2 | 4 0 \ +4 0 | 4 2 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 2 | 4 1 | 4 0 | 4 0 | 4 2 | 4 0 \ +4 2 | 4 0 | 4 0 | 4 3 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 \ +4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 \ +4 1 | 4 0 | 4 0 | 4 0 | 4 3 | 4 1 | 4 0 | 4 0 | 4 1 | 4 2 | 4 0 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 \ +4 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 25 +4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 3 | 4 1 | 4 2 | 4 2 | 4 2 | 4 2 | 4 1 | 4 1 | 4 2 | 4 1 | 4 2 | 4 1 | 4 2 \ +4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 | 3 0 | 3 2 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 26 +4 1 | 4 2 | 4 3 | 4 3 | 4 3 | 4 4 | 4 3 | 4 1 | 4 1 | 4 1 | 4 3 | 4 1 | 4 1 | 4 2 | 4 1 | 4 2 | 4 1 | 4 1 \ +4 2 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 4 2 | 4 0 | 4 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 2 | 3 0 | 3 0 | 3 3 | 3 0 | 3 1 | 3 3 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 2 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 27 +4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 2 | 4 1 | 4 0 | 4 3 | 4 2 | 4 1 | 4 1 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 \ +4 0 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 3 | 3 0 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 3 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 \ +3 0 | 3 0 + +# arbre 29 +4 0 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 0 | 4 0 \ +4 1 | 4 0 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 0 | 4 2 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 \ +4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 1 | 4 0 | 4 0 | 4 2 | 4 1 | 4 1 | 4 0 \ +4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 31 +4 2 | 4 3 | 4 2 | 4 3 | 4 1 | 4 4 | 4 3 | 4 2 | 4 3 | 4 3 | 4 1 | 4 2 | 4 3 | 4 2 | 4 2 | 4 1 | 4 1 | 4 3 \ +4 1 | 4 1 | 4 2 | 4 1 | 4 1 | 4 2 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 2 | 3 1 | 3 0 | 3 1 | 3 1 | 3 2 | 3 0 | 3 0 | 3 3 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 2 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 2 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 2 | 3 1 | 3 0 | 3 0 \ +3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 \ +3 0 + +# arbre 36 +4 2 | 4 4 | 4 3 | 4 4 | 4 3 | 4 3 | 4 3 | 4 3 | 4 4 | 4 3 | 4 3 | 4 1 | 4 4 | 4 4 | 4 4 | 4 1 | 4 1 | 4 3 \ +4 3 | 4 2 | 4 2 | 4 2 | 3 0 | 3 3 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 2 | 3 1 | 3 0 | 3 2 | 3 1 | 3 2 | 3 0 | 3 1 | 3 3 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 3 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 \ +3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 37 +4 1 | 4 0 | 4 0 | 4 1 | 4 2 | 4 3 | 4 0 | 4 4 | 4 1 | 4 1 | 4 1 | 4 2 | 4 2 | 4 2 | 4 1 | 4 1 | 4 1 | 4 2 \ +4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 2 | 4 0 | 4 0 \ +4 2 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 \ +3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 \ +3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 38 +4 0 | 4 0 | 4 0 | 4 2 | 4 1 | 4 1 | 4 1 | 4 1 | 4 2 | 4 2 | 4 2 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 \ +4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 \ +4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 3 | 4 0 | 4 1 | 4 0 | 4 3 | 4 0 | 4 2 | 4 0 | 4 1 \ +4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 | 4 0 | 4 2 | 4 1 | 4 0 | 4 1 | 4 1 \ +4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 0 | 3 0 + +# arbre 40 +4 1 | 4 2 | 4 3 | 4 3 | 4 2 | 4 1 | 4 2 | 4 0 | 4 1 | 4 3 | 4 0 | 4 2 | 4 0 | 4 3 | 3 1 | 3 2 | 3 0 | 3 2 \ +3 3 | 3 2 | 3 0 | 3 0 | 3 1 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 \ +3 1 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 1 | 3 1 | 3 0 | 3 0 | 3 2 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 \ +3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 42 +4 0 | 4 2 | 4 1 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 2 | 4 2 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 1 | 4 0 | 4 3 \ +4 0 | 4 3 | 4 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 \ +3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 3 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 \ +3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 43 +4 0 | 4 1 | 4 2 | 4 2 | 4 2 | 4 2 | 4 1 | 4 1 | 4 1 | 4 2 | 4 2 | 4 2 | 4 3 | 4 1 | 4 1 | 4 1 | 4 1 | 4 2 \ +4 0 | 4 2 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 0 | 4 4 | 4 0 \ +4 0 | 4 2 | 4 1 | 4 2 | 4 1 | 4 0 | 4 2 | 4 0 | 4 2 | 4 0 | 4 2 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 \ +4 0 | 4 1 | 4 1 | 4 1 | 4 2 | 4 0 | 4 0 | 4 2 | 4 0 | 4 0 | 4 0 | 4 1 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 \ +4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 45 +4 0 | 4 2 | 4 1 | 4 1 | 4 3 | 4 2 | 4 1 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 \ +4 0 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 2 | 4 0 | 4 0 \ +4 2 | 4 1 | 4 0 | 4 2 | 3 0 | 3 2 | 3 0 | 3 1 | 3 2 | 3 0 | 3 1 | 3 2 | 3 0 | 3 2 | 3 1 | 3 1 | 3 0 | 3 2 \ +3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 2 | 3 0 \ +3 1 | 3 2 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 \ +3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 47 +4 0 | 4 0 | 4 2 | 4 2 | 4 1 | 4 1 | 4 1 | 4 2 | 4 0 | 4 0 | 4 1 | 4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 \ +4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 0 | 4 0 | 4 3 | 4 0 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 \ +4 1 | 4 0 | 4 1 | 4 0 | 4 3 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 \ +4 1 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 | 4 2 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 0 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 48 +4 2 | 4 4 | 4 4 | 4 4 | 4 2 | 4 2 | 4 4 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 2 | 4 1 | 4 0 | 4 0 | 4 1 | 4 1 \ +4 0 | 4 1 | 4 0 | 4 3 | 4 3 | 4 1 | 4 0 | 4 0 | 4 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 1 | 3 0 \ +3 1 | 3 1 | 3 0 | 3 2 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 \ +3 0 + +# arbre 52 +4 0 | 4 0 | 4 0 | 4 1 | 4 2 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 2 | 4 0 | 4 2 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 \ +4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 0 | 3 2 | 3 0 \ +3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 55 +4 0 | 4 1 | 4 0 | 4 0 | 4 2 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 2 | 4 0 | 4 0 | 4 1 | 4 1 | 4 0 \ +4 1 | 4 0 | 4 0 | 4 2 | 4 0 | 3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 1 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 1 | 3 2 | 3 0 | 3 1 \ +3 0 | 3 1 | 3 0 | 3 1 | 3 2 | 3 0 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 | 3 0 | 3 0 \ +3 2 | 3 1 | 3 0 | 3 1 | 3 1 | 3 0 | 3 2 | 3 0 | 3 0 | 3 2 | 3 0 | 3 1 | 3 1 | 3 0 | 3 1 | 3 0 | 3 1 | 3 1 \ +3 0 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 56 +4 0 | 4 2 | 4 0 | 4 3 | 4 0 | 4 1 | 4 0 | 4 0 | 4 1 | 4 1 | 4 1 | 4 3 | 4 0 | 4 1 | 4 0 | 4 0 | 4 0 | 4 1 \ +4 0 | 4 1 | 4 0 | 4 0 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 0 | 4 3 | 4 0 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 1 \ +4 1 | 4 1 | 4 0 | 4 3 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 0 | 4 1 | 4 0 \ +4 3 | 4 0 | 4 1 | 4 0 | 4 1 | 4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 3 1 | 3 0 | 3 0 | 3 1 \ +3 2 | 3 2 | 3 0 | 3 0 | 3 2 | 3 1 | 3 1 | 3 0 | 3 0 | 3 1 | 3 1 | 3 1 | 3 2 | 3 0 | 3 0 | 3 1 | 3 0 | 3 1 \ +3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 1 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 | 3 0 + +# arbre 57 +4 0 | 4 0 | 4 0 | 4 2 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 2 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 \ +4 0 | 4 1 | 4 0 | 4 1 | 4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 2 \ +4 0 | 4 2 | 4 0 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 | 4 1 \ +4 0 | 4 1 | 4 1 | 4 0 | 4 1 | 4 1 | 4 1 | 4 0 | 4 1 | 4 0 | 4 0 | 4 3 | 4 0 | 4 0 | 4 1 | 4 0 | 4 3 | 4 0 \ +4 0 | 4 2 | 4 1 | 4 2 | 4 0 | 4 1 | 4 2 | 4 2 | 4 1 | 4 0 | 4 1 | 4 0 | 4 2 | 4 0 | 4 1 | 4 0 | 4 1 | 4 0 \ +4 1 | 4 0 | 4 0 | 4 0 | 4 0 | 4 0 + diff --git a/src/openalea/sequence_analysis/data/elstar1.hsc b/src/openalea/sequence_analysis/data/elstar1.hsc new file mode 100644 index 0000000..52e09b4 --- /dev/null +++ b/src/openalea/sequence_analysis/data/elstar1.hsc @@ -0,0 +1,89 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +8 STATES + +INITIAL_PROBABILITIES +0.5 0.5 0.0 0.0 0.0 0.0 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.3 0.3 0.2 0.2 0.0 0.0 0.0 +0.0 0.0 0.3 0.3 0.2 0.2 0.0 0.0 +0.0 0.0 0.0 0.3 0.3 0.2 0.2 0.0 +0.0 0.0 0.0 0.0 0.3 0.3 0.2 0.2 +0.0 0.0 0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 5 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 6 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.1 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.3 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.1 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.3 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.1 + +STATE 7 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 diff --git a/src/openalea/sequence_analysis/data/elstar1.seq b/src/openalea/sequence_analysis/data/elstar1.seq new file mode 100644 index 0000000..4d152f2 --- /dev/null +++ b/src/openalea/sequence_analysis/data/elstar1.seq @@ -0,0 +1,67 @@ +1 VARIABLE + +VARIABLE 1 : INT # 5 values + +# value histogram - size of the sample: 1058 +# mean: 1.18053 variance: 2.51326 standard deviation: 1.58533 + +# | value histogram +# 0 633 latent bud +# 1 41 short shoot +# 2 121 long shoot +# 3 86 fruiting shoot +# 4 177 immediate shoot + +# sequence length histogram - size of the sample: 16 +# mean: 66.125 variance: 81.3167 standard deviation: 9.01758 + +# cumulative length: 1058 + +0 0 2 2 2 2 2 1 1 0 0 4 3 1 0 3 3 3 3 1 0 3 3 2 3 0 3 0 4 4 4 4 4 0 0 0 0 4 4 0 0 \ +4 0 0 0 0 4 0 0 4 4 0 0 0 0 0 0 0 0 0 0 2 0 2 2 1 1 1 1 1 1 1 + +0 2 2 3 3 3 0 3 3 0 3 0 0 3 4 4 4 4 4 4 4 0 4 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 4 4 \ +0 4 4 4 4 0 1 2 1 1 0 1 1 2 0 1 1 0 + +0 0 0 0 0 0 0 0 0 3 0 0 2 0 3 3 2 3 1 1 3 2 4 4 4 4 4 4 4 4 4 4 4 0 0 4 4 0 0 0 0 \ +0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 + +2 2 2 2 2 3 2 2 2 2 1 0 4 4 4 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 3 3 0 3 3 3 3 2 2 3 0 0 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 \ +2 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 2 1 0 1 2 1 3 1 3 3 0 1 1 0 0 4 3 0 4 0 1 2 2 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 + +0 2 3 3 3 3 3 3 0 0 0 0 0 0 4 4 4 4 4 4 4 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 \ +0 2 0 0 4 4 2 0 4 0 2 0 2 0 0 2 0 0 0 + +0 0 0 0 0 2 2 3 3 3 0 0 3 0 3 3 0 0 0 3 2 0 2 1 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 2 2 0 2 0 2 0 0 0 0 0 0 0 + +0 0 0 2 3 3 3 2 0 0 0 0 0 0 0 0 3 4 4 4 4 4 4 4 4 4 0 4 4 4 4 0 0 0 0 0 0 2 0 0 0 \ +0 0 2 0 0 0 0 2 4 0 0 2 2 2 0 0 2 2 0 0 0 0 + +0 2 3 3 3 1 3 0 3 1 0 0 0 3 3 4 4 4 4 4 4 4 4 4 0 4 0 4 0 0 0 0 0 0 0 0 0 0 0 0 4 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 2 2 2 1 1 0 3 1 3 1 4 4 4 3 3 3 3 0 0 2 3 2 1 0 1 0 0 4 4 4 4 4 4 0 4 0 4 4 0 \ +4 0 0 0 0 0 0 0 4 0 0 0 0 0 0 4 0 4 4 0 2 0 0 4 0 0 0 0 0 0 + +0 2 3 3 0 3 3 0 3 3 3 3 3 3 0 3 0 0 2 2 2 2 0 2 2 0 0 2 4 4 0 4 4 0 0 4 4 0 0 0 2 \ +0 0 2 4 2 0 0 4 0 2 0 2 0 2 0 0 2 0 0 0 0 0 0 0 + +0 2 2 2 2 2 2 2 2 2 1 1 2 0 0 0 0 4 4 4 4 4 4 4 4 4 4 4 4 0 4 4 0 4 0 0 0 0 0 2 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 0 0 0 0 + +1 2 2 2 2 2 3 0 3 0 3 3 3 3 3 0 0 0 3 4 3 4 4 0 4 0 0 0 0 0 0 0 0 0 0 0 2 4 4 4 4 \ +4 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 2 0 0 2 4 4 4 0 2 \ +0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 + +2 2 2 2 2 3 0 0 0 3 4 4 4 4 4 4 4 4 0 4 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +2 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/fagus1.his b/src/openalea/sequence_analysis/data/fagus1.his new file mode 100644 index 0000000..858666d --- /dev/null +++ b/src/openalea/sequence_analysis/data/fagus1.his @@ -0,0 +1,11 @@ + 0 0 + 1 0 + 2 0 + 3 1 + 4 1 + 5 2 + 6 8 + 7 22 + 8 33 + 9 20 + 10 5 diff --git a/src/openalea/sequence_analysis/data/fuji1.hsc b/src/openalea/sequence_analysis/data/fuji1.hsc new file mode 100644 index 0000000..d153ad3 --- /dev/null +++ b/src/openalea/sequence_analysis/data/fuji1.hsc @@ -0,0 +1,78 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +7 STATES + +INITIAL_PROBABILITIES +0.5 0.5 0.0 0.0 0.0 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.4 0.3 0.3 0.0 0.0 0.0 +0.0 0.0 0.4 0.3 0.3 0.0 0.0 +0.0 0.0 0.0 0.4 0.3 0.3 0.0 +0.0 0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 5 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.1 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.3 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 diff --git a/src/openalea/sequence_analysis/data/fuji1.seq b/src/openalea/sequence_analysis/data/fuji1.seq new file mode 100644 index 0000000..92e6082 --- /dev/null +++ b/src/openalea/sequence_analysis/data/fuji1.seq @@ -0,0 +1,64 @@ +1 VARIABLE + +VARIABLE 1 : INT # 5 values + +# value histogram - size of the sample: 1109 +# mean: 0.934175 variance: 1.8666 standard deviation: 1.36624 + +# | value histogram +# 0 689 latent bud +# 1 78 short shoot +# 2 178 long shoot +# 3 54 fruiting shoot +# 4 110 immediate shoot + +# sequence length histogram - size of the sample: 15 +# mean: 73.9333 variance: 20.781 standard deviation: 4.55861 + +# cumulative length: 1109 + +0 0 0 0 2 2 3 0 3 0 0 3 0 3 0 0 3 4 4 4 4 4 4 4 4 4 4 3 0 0 0 0 0 0 0 0 0 0 1 2 0 \ +0 2 0 2 0 0 2 0 2 0 0 0 0 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 3 1 2 1 3 3 0 3 1 2 0 1 4 4 4 1 4 4 0 0 1 0 0 0 0 0 4 0 0 0 0 0 \ +4 0 4 0 0 0 1 0 0 0 0 2 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 2 3 2 2 0 3 3 1 0 1 0 4 0 3 3 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 4 2 2 2 2 2 2 \ +2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 0 2 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 + +2 2 2 2 2 0 3 0 0 0 0 0 0 2 0 0 0 0 0 3 2 2 2 0 0 0 0 0 0 0 0 0 1 0 1 2 1 2 2 2 2 \ +1 2 1 2 2 2 2 2 2 0 2 2 2 0 2 0 2 1 0 2 0 0 0 0 0 0 0 0 0 + +0 0 0 2 2 2 2 2 0 2 2 0 0 3 1 1 0 1 0 1 2 4 4 4 4 0 2 0 0 4 4 4 4 0 2 0 4 0 0 4 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +2 2 2 2 2 2 2 1 2 1 1 2 1 1 1 0 0 4 4 1 4 2 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 2 0 \ +1 2 0 1 2 2 2 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 2 2 2 3 2 0 0 0 4 0 0 4 4 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 4 0 0 4 0 0 \ +0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 0 0 4 4 4 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 2 0 2 0 0 0 0 0 0 + +2 2 2 1 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 4 1 4 4 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 1 0 0 1 0 1 1 0 1 0 1 0 1 2 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 + +0 0 0 0 0 0 2 2 3 1 1 2 1 1 1 3 3 2 3 1 2 1 3 2 1 4 4 4 4 4 4 4 4 0 4 1 0 0 0 0 0 \ +0 1 4 0 0 4 0 2 2 0 0 0 0 0 0 0 2 0 0 0 1 1 1 2 2 1 1 2 2 2 1 0 0 0 1 0 0 0 0 0 0 \ +0 + +0 0 2 2 0 2 1 0 0 1 2 0 2 0 1 1 3 1 3 0 0 0 1 1 4 4 4 4 0 0 4 0 4 0 0 4 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 2 2 0 2 2 0 0 0 2 0 0 2 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 2 0 0 0 0 0 0 0 3 0 3 0 0 3 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +2 0 0 0 0 2 0 2 0 0 0 0 0 2 0 2 2 2 2 0 0 2 0 2 0 0 0 0 0 0 0 + +0 0 0 0 0 0 1 0 0 3 0 3 0 0 3 3 3 3 4 4 4 4 0 4 4 0 0 0 0 4 0 0 0 4 0 0 4 0 4 0 0 \ +4 0 0 0 0 0 0 0 0 0 2 0 0 2 0 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 2 2 2 2 2 1 1 3 0 0 4 4 4 0 4 4 0 4 4 4 4 0 0 0 0 4 4 0 2 0 4 4 0 4 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 2 3 0 2 3 1 0 2 0 3 3 3 2 4 4 4 4 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 2 0 0 2 0 0 2 2 0 0 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/gala1.hsc b/src/openalea/sequence_analysis/data/gala1.hsc new file mode 100644 index 0000000..471b685 --- /dev/null +++ b/src/openalea/sequence_analysis/data/gala1.hsc @@ -0,0 +1,78 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +7 STATES + +INITIAL_PROBABILITIES +0.5 0.5 0.0 0.0 0.0 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.4 0.3 0.3 0.0 0.0 0.0 +0.0 0.0 0.4 0.3 0.3 0.0 0.0 +0.0 0.0 0.0 0.4 0.3 0.3 0.0 +0.0 0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 5 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.2 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.1 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.3 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 diff --git a/src/openalea/sequence_analysis/data/gala1.seq b/src/openalea/sequence_analysis/data/gala1.seq new file mode 100644 index 0000000..9078856 --- /dev/null +++ b/src/openalea/sequence_analysis/data/gala1.seq @@ -0,0 +1,69 @@ +1 VARIABLE + +VARIABLE 1 : INT # 5 values + +# value histogram - size of the sample: 1190 +# mean: 1.38992 variance: 2.91764 standard deviation: 1.70811 + +# | value histogram +# 0 681 latent bud +# 1 26 short shoot +# 2 71 long shoot +# 3 162 fruiting shoot +# 4 250 immediate shoot + +# sequence length histogram - size of the sample: 17 +# mean: 70 variance: 17.125 standard deviation: 4.13824 + +# cumulative length: 1190 + +2 0 0 2 3 3 3 3 0 0 1 1 1 3 3 3 3 3 3 0 0 3 0 3 0 3 4 4 0 0 0 0 4 3 4 4 4 0 4 1 0 \ +0 0 0 0 0 0 0 0 0 1 0 4 2 0 0 0 4 0 3 4 2 0 1 0 0 0 0 0 0 4 4 0 4 0 0 0 0 + +0 2 2 2 3 3 2 3 1 3 3 3 3 4 4 4 1 1 1 1 0 4 4 4 1 0 0 4 0 4 4 0 1 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 3 1 3 3 3 3 3 3 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 0 0 0 0 0 \ +0 0 2 0 0 0 4 0 0 0 0 2 0 2 4 0 4 0 0 0 0 0 0 0 0 0 0 + +0 0 2 2 1 0 1 3 3 1 3 3 3 3 3 3 0 3 3 4 4 4 4 4 4 4 0 0 0 0 0 4 4 4 4 4 4 4 4 0 4 \ +0 0 0 0 0 0 0 0 2 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 3 3 1 3 0 0 3 0 0 2 0 3 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 4 4 4 0 4 0 4 \ +0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 2 0 3 3 3 3 3 3 3 0 3 0 3 0 4 4 4 4 4 4 4 0 0 0 0 0 0 0 4 4 0 4 0 0 0 0 0 0 0 \ +2 0 0 0 0 0 0 0 0 2 4 4 4 0 0 2 0 0 4 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 2 3 0 0 3 0 0 0 0 3 0 0 0 0 0 0 0 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 4 4 0 4 0 0 4 4 4 4 0 0 0 0 0 4 0 0 4 0 4 0 0 4 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 3 3 0 3 3 3 0 3 3 4 4 4 4 3 0 4 4 4 4 0 4 4 4 4 4 \ +4 4 4 4 0 4 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 + +0 2 2 2 1 3 3 3 3 0 3 0 3 3 3 0 3 0 3 4 4 4 4 0 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 \ +0 4 4 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 2 3 3 3 3 4 4 4 4 4 3 3 3 0 0 0 4 4 4 4 4 4 0 4 4 0 0 0 0 0 0 0 0 0 0 2 \ +0 0 0 0 2 0 0 2 0 0 2 2 0 0 0 0 0 0 0 0 2 0 0 + +0 0 0 2 2 3 3 3 3 3 3 3 3 3 3 0 0 0 0 0 0 0 4 4 4 4 4 0 0 0 0 0 0 0 0 0 4 4 4 4 4 \ +4 4 4 4 0 4 4 0 4 0 0 0 0 0 0 0 0 0 2 2 0 2 0 + +0 2 3 3 3 3 3 3 3 3 3 3 0 4 4 4 4 4 4 4 0 0 0 0 0 4 4 4 0 4 0 0 3 0 0 0 0 0 0 0 0 \ +4 2 0 0 0 0 2 2 2 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 3 0 0 0 3 3 3 3 3 3 3 3 0 0 4 4 4 4 4 4 4 4 0 0 0 0 0 0 4 4 4 4 4 4 4 4 \ +4 4 0 0 0 0 2 0 0 0 1 2 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 + +2 0 0 0 0 0 0 3 0 3 3 0 3 3 0 3 0 3 0 3 3 4 4 4 0 4 4 4 4 4 4 4 4 4 4 4 4 4 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 3 3 0 3 0 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 0 0 0 0 4 4 4 4 4 4 4 0 0 0 \ +0 0 0 0 0 0 2 0 0 0 0 0 2 0 2 2 0 0 2 0 0 0 0 2 0 0 0 + +0 1 1 0 0 0 3 0 3 3 3 3 0 3 3 0 4 4 4 4 4 4 0 0 0 4 4 4 4 4 4 4 4 4 4 0 4 0 0 4 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 4 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 + +2 2 2 2 3 3 3 3 3 3 3 3 3 0 3 3 4 4 3 2 3 0 0 0 0 0 0 0 0 0 4 4 0 3 0 3 0 0 3 0 3 \ +2 0 3 3 3 2 0 1 2 2 1 0 0 0 4 4 0 4 0 0 4 4 0 4 0 4 0 0 4 0 0 0 diff --git a/src/openalea/sequence_analysis/data/granny1.hsc b/src/openalea/sequence_analysis/data/granny1.hsc new file mode 100644 index 0000000..2c1b93d --- /dev/null +++ b/src/openalea/sequence_analysis/data/granny1.hsc @@ -0,0 +1,89 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +8 STATES + +INITIAL_PROBABILITIES +0.5 0.5 0.0 0.0 0.0 0.0 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.4 0.3 0.3 0.0 0.0 0.0 0.0 +0.0 0.0 0.4 0.3 0.3 0.0 0.0 0.0 +0.0 0.0 0.0 0.4 0.3 0.3 0.0 0.0 +0.0 0.0 0.0 0.0 0.4 0.3 0.3 0.0 +0.0 0.0 0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 5 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 6 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.2 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.1 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.3 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.1 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.3 + +STATE 7 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 diff --git a/src/openalea/sequence_analysis/data/granny1.seq b/src/openalea/sequence_analysis/data/granny1.seq new file mode 100644 index 0000000..c7c4d6a --- /dev/null +++ b/src/openalea/sequence_analysis/data/granny1.seq @@ -0,0 +1,66 @@ +1 VARIABLE + +VARIABLE 1 : INT # 5 values + +# value histogram - size of the sample: 1201 +# mean: 1.21232 variance: 2.61238 standard deviation: 1.61629 + +# | value histogram +# 0 728 latent bud +# 1 18 short shoot +# 2 138 long shoot +# 3 106 fruiting shoot +# 4 211 immediate shoot + +# sequence length histogram - size of the sample: 16 +# mean: 75.0625 variance: 31.5292 standard deviation: 5.61508 + +# cumulative length: 1201 + +0 2 2 2 2 2 0 2 3 3 0 3 0 3 4 0 0 0 0 3 0 0 4 0 0 0 0 0 0 0 4 4 4 4 4 4 0 0 0 0 4 \ +0 2 4 4 0 0 0 0 0 0 0 0 4 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 2 1 2 2 2 2 2 0 0 3 0 3 2 3 0 3 0 3 3 4 4 0 4 0 4 0 3 3 0 0 0 4 0 0 4 4 0 0 0 \ +0 0 0 0 0 0 0 0 4 0 1 0 0 1 0 0 0 0 0 0 0 0 2 0 0 0 1 0 0 1 0 0 0 0 0 0 0 + +0 0 0 0 0 0 2 0 2 2 2 2 2 2 2 0 3 3 3 3 3 3 3 0 3 3 0 0 0 0 0 0 0 4 4 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 4 4 4 4 0 2 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 0 3 0 0 0 0 0 0 0 0 4 4 4 4 0 4 0 0 0 0 4 4 4 4 \ +0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 4 0 0 4 0 0 0 0 0 0 0 0 0 0 0 + +0 0 2 2 2 2 2 2 2 0 2 2 2 3 3 2 3 3 0 3 3 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 4 \ +4 4 4 4 4 4 4 4 0 0 0 0 4 0 0 4 0 2 0 0 0 0 0 0 1 4 4 0 0 0 0 4 0 0 0 0 0 0 0 0 0 + +0 0 2 2 2 2 2 2 3 3 3 2 0 3 0 0 0 0 4 3 0 0 0 4 4 4 4 4 4 4 4 4 4 0 0 4 4 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 2 2 2 3 2 3 3 3 3 0 3 3 0 0 0 3 0 0 0 2 0 3 3 2 4 0 4 4 4 4 4 4 4 \ +4 4 0 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 2 4 4 2 2 0 0 0 0 0 0 0 + +0 0 0 0 2 2 2 0 2 2 3 2 0 3 1 2 3 0 4 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 2 2 4 0 4 3 0 0 0 3 4 4 4 0 0 3 0 0 0 0 0 4 4 4 4 0 4 0 4 4 0 0 0 0 0 0 \ +0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 2 2 2 2 2 2 3 0 3 0 0 0 3 0 3 0 3 3 3 0 0 0 0 4 4 4 4 4 4 4 4 4 4 4 4 0 0 4 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 2 4 4 4 0 2 0 2 0 0 0 0 0 0 + +0 0 2 2 2 2 0 2 0 3 3 3 0 3 3 3 3 0 3 0 0 0 0 0 0 2 0 3 0 4 0 4 4 4 4 4 4 0 0 0 0 \ +0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 0 4 2 0 0 0 0 0 0 0 0 + +2 2 2 2 2 2 2 2 2 3 3 0 3 3 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 \ +4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 4 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 2 2 2 0 2 2 0 2 2 3 0 0 3 0 3 3 3 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 4 0 4 4 0 \ +0 0 0 0 1 1 1 1 0 2 0 1 0 1 2 0 0 0 1 2 2 1 0 1 2 0 1 0 0 2 0 1 0 0 0 0 + +0 2 2 2 2 2 3 2 3 3 0 3 3 3 3 0 3 0 3 3 3 3 0 0 0 0 4 4 4 4 4 4 4 4 0 0 4 3 0 0 4 \ +4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 2 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 2 2 2 2 2 2 2 3 3 3 3 3 3 3 0 0 0 0 0 3 3 3 0 0 0 3 4 4 4 4 4 4 4 4 4 4 4 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 2 2 2 2 0 0 2 0 2 2 2 0 0 0 2 0 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/laricio_date66.seq b/src/openalea/sequence_analysis/data/laricio_date66.seq new file mode 100644 index 0000000..5d4bb44 --- /dev/null +++ b/src/openalea/sequence_analysis/data/laricio_date66.seq @@ -0,0 +1,65 @@ +3 VARIABLES + +VARIABLE 1 : STATE # !!! was TIME. Switched to STATE otherwise reading function fails +VARIABLE 2 : STATE # longueur de l'UC +VARIABLE 3 : STATE # nombre de branches par UC + +27 330 7 | 28 290 8 | 29 250 6 | 30 320 5 | 31 440 5 | 32 440 7 | 33 480 4 | 34 470 6 \ +35 570 5 | 36 640 7 | 37 700 6 | 38 650 1 | 39 590 5 | 40 680 6 | 41 640 5 | 42 630 6 \ +43 530 5 | 44 540 5 | 45 670 7 | 46 750 7 | 47 530 5 | 48 820 6 | 49 590 6 | 50 650 5 \ +51 650 6 | 52 700 6 | 53 710 5 | 54 660 6 | 55 650 5 | 56 660 4 | 57 620 4 | 58 510 5 \ +59 630 6 | 60 760 4 | 61 540 1 | 62 360 4 | 63 500 4 | 64 460 5 | 65 460 4 | 66 520 4 \ +67 610 5 | 68 560 5 | 69 550 5 | 70 540 5 | 71 560 3 | 72 380 3 | 73 350 3 | 74 450 2 \ +75 180 4 | 76 470 5 | 77 350 5 | 78 520 2 | 79 390 5 | 80 420 5 | 81 510 5 | 82 240 0 \ +83 250 8 | 84 290 3 | 85 280 2 | 86 190 4 | 87 230 3 | 88 400 4 | 89 380 5 | 90 240 4 \ +91 180 3 | 92 210 4 # (1) + +27 110 1 | 28 210 3 | 29 180 3 | 30 170 4 | 31 270 4 | 32 360 6 | 33 410 5 | 34 390 6 \ +35 410 6 | 36 600 6 | 37 590 5 | 38 630 6 | 39 640 7 | 40 610 5 | 41 600 8 | 42 620 7 \ +43 650 6 | 44 680 7 | 45 710 4 | 46 830 7 | 47 660 8 | 48 870 7 | 49 630 7 | 50 520 6 \ +51 610 6 | 52 630 7 | 53 790 5 | 54 640 6 | 55 640 6 | 56 680 7 | 57 560 5 | 58 670 8 \ +59 700 5 | 60 580 5 | 61 410 4 | 62 550 5 | 63 370 4 | 64 380 5 | 65 510 6 | 66 360 6 \ +67 610 6 | 68 490 6 | 69 470 5 | 70 610 6 | 71 450 5 | 72 370 5 | 73 380 6 | 74 400 4 \ +75 410 4 | 76 470 4 | 77 470 5 | 78 500 4 | 79 450 5 | 80 440 6 | 81 450 5 | 82 490 5 \ +83 430 6 | 84 350 5 | 85 300 5 | 86 130 3 | 87 110 5 | 88 210 4 | 89 270 4 | 90 200 3 \ +91 90 3 | 92 160 3 # (2) + +27 160 4 | 28 200 5 | 29 260 3 | 30 200 4 | 31 220 4 | 32 290 5 | 33 340 5 | 34 370 1 \ +35 320 5 | 36 370 4 | 37 440 5 | 38 580 7 | 39 430 2 | 40 500 5 | 41 340 6 | 42 340 5 \ +43 440 4 | 44 380 4 | 45 320 5 | 46 430 5 | 47 350 5 | 48 490 5 | 49 520 6 | 50 500 4 \ +51 550 5 | 52 660 5 | 53 620 5 | 54 800 5 | 55 660 5 | 56 670 6 | 57 580 7 | 58 450 6 \ +59 390 5 | 60 630 5 | 61 550 7 | 62 750 7 | 63 530 6 | 64 560 6 | 65 640 6 | 66 500 7 \ +67 570 6 | 68 540 6 | 69 530 6 | 70 640 5 | 71 490 5 | 72 640 5 | 73 420 4 | 74 520 4 \ +75 530 6 | 76 540 5 | 77 380 3 | 78 430 4 | 79 490 6 | 80 450 4 | 81 500 7 | 82 520 5 \ +83 500 7 | 84 510 6 | 85 360 4 | 86 190 5 | 87 220 6 | 88 240 4 | 89 240 5 | 90 210 0 \ +91 130 2 | 92 130 2 # (3) + +27 210 4 | 28 230 6 | 29 360 5 | 30 710 6 | 31 830 6 | 32 550 6 | 33 670 6 | 34 650 8 \ +35 810 8 | 36 780 7 | 37 820 7 | 38 900 8 | 39 670 8 | 40 710 8 | 41 670 8 | 42 820 8 \ +43 820 6 | 44 640 6 | 45 570 7 | 46 680 6 | 47 520 6 | 48 600 5 | 49 530 6 | 50 560 6 \ +51 760 6 | 52 680 8 | 53 490 6 | 54 440 4 | 55 410 5 | 56 540 4 | 57 360 6 | 58 540 5 \ +59 600 6 | 60 380 5 | 61 470 5 | 62 500 3 | 63 210 2 | 64 240 5 | 65 330 4 | 66 290 4 \ +67 430 3 | 68 250 4 | 69 430 5 | 70 450 5 | 71 410 6 | 72 360 4 | 73 310 4 | 74 420 4 \ +75 430 5 | 76 270 4 | 77 140 2 | 78 220 4 | 79 290 4 | 80 50 4 | 81 310 4 | 82 230 4 \ +83 180 4 | 84 130 2 | 85 120 2 | 86 100 1 | 87 80 2 | 88 160 4 | 89 200 2 | 90 100 2 \ +91 110 2 | 92 190 4 # (4) + +27 110 3 | 28 180 4 | 29 100 5 | 30 210 6 | 31 500 9 | 32 570 8 | 33 590 9 | 34 610 10 \ +35 700 10 | 36 700 11 | 37 640 12 | 38 580 9 | 39 630 11 | 40 670 9 | 41 470 7 | 42 570 11 \ +43 740 9 | 44 610 11 | 45 660 13 | 46 670 8 | 47 500 8 | 48 550 7 | 49 540 10 | 50 490 7 \ +51 640 8 | 52 550 9 | 53 570 7 | 54 480 7 | 55 480 9 | 56 630 7 | 57 460 10 | 58 640 9 \ +59 610 9 | 60 480 8 | 61 490 8 | 62 560 9 | 63 410 5 | 64 420 6 | 65 470 5 | 66 420 5 \ +67 540 7 | 68 420 5 | 69 540 7 | 70 480 5 | 71 530 7 | 72 520 7 | 73 390 8 | 74 480 4 \ +75 320 3 | 76 390 6 | 77 290 4 | 78 240 3 | 79 260 5 | 80 340 5 | 81 250 4 | 82 420 7 \ +83 360 7 | 84 320 6 | 85 300 6 | 86 290 6 | 87 260 5 | 88 320 5 | 89 330 8 | 90 350 8 \ +91 120 2 | 92 190 5 # (5) + +27 130 2 | 28 380 5 | 29 220 5 | 30 410 5 | 31 510 7 | 32 530 5 | 33 610 5 | 34 720 8 \ +35 540 5 | 36 600 5 | 37 510 7 | 38 610 7 | 39 540 6 | 40 580 5 | 41 440 8 | 42 540 6 \ +43 470 6 | 44 480 6 | 45 450 6 | 46 510 6 | 47 360 4 | 48 470 5 | 49 380 4 | 50 350 4 \ +51 440 5 | 52 500 5 | 53 440 4 | 54 410 4 | 55 420 5 | 56 590 6 | 57 350 5 | 58 530 4 \ +59 460 4 | 60 300 4 | 61 500 4 | 62 370 4 | 63 190 2 | 64 240 4 | 65 490 4 | 66 390 4 \ +67 520 5 | 68 490 4 | 69 500 4 | 70 570 6 | 71 410 4 | 72 500 5 | 73 370 3 | 74 420 5 \ +75 390 4 | 76 210 4 | 77 140 1 | 78 150 4 | 79 290 4 | 80 170 4 | 81 210 3 | 82 310 4 \ +83 120 1 | 84 250 4 | 85 140 3 | 86 130 3 | 87 110 2 | 88 200 3 | 89 210 4 | 90 270 3 \ +91 130 4 | 92 180 3 # (6) diff --git a/src/openalea/sequence_analysis/data/laricio_position66.seq b/src/openalea/sequence_analysis/data/laricio_position66.seq new file mode 100644 index 0000000..a217d30 --- /dev/null +++ b/src/openalea/sequence_analysis/data/laricio_position66.seq @@ -0,0 +1,49 @@ +# pins laricio de 75 ans + +2 VARIABLES + +VARIABLE 1 : POSITION_INTERVAL # longueur de l'UC +VARIABLE 2 : STATE # nombre de branches par UC + +330 7 | 290 8 | 250 6 | 320 5 | 440 5 | 440 7 | 480 4 | 470 6 | 570 5 | 640 7 | 700 6 \ +650 1 | 590 5 | 680 6 | 640 5 | 630 6 | 530 5 | 540 5 | 670 7 | 750 7 | 530 5 | 820 6 \ +590 6 | 650 5 | 650 6 | 700 6 | 710 5 | 660 6 | 650 5 | 660 4 | 620 4 | 510 5 | 630 6 \ +760 4 | 540 1 | 360 4 | 500 4 | 460 5 | 460 4 | 520 4 | 610 5 | 560 5 | 550 5 | 540 5 \ +560 3 | 380 3 | 350 3 | 450 2 | 180 4 | 470 5 | 350 5 | 520 2 | 390 5 | 420 5 | 510 5 \ +240 0 | 250 8 | 290 3 | 280 2 | 190 4 | 230 3 | 400 4 | 380 5 | 240 4 | 180 3 | 210 4 | 0 + +110 1 | 210 3 | 180 3 | 170 4 | 270 4 | 360 6 | 410 5 | 390 6 | 410 6 | 600 6 | 590 5 \ +630 6 | 640 7 | 610 5 | 600 8 | 620 7 | 650 6 | 680 7 | 710 4 | 830 7 | 660 8 | 870 7 \ +630 7 | 520 6 | 610 6 | 630 7 | 790 5 | 640 6 | 640 6 | 680 7 | 560 5 | 670 8 | 700 5 \ +580 5 | 410 4 | 550 5 | 370 4 | 380 5 | 510 6 | 360 6 | 610 6 | 490 6 | 470 5 | 610 6 \ +450 5 | 370 5 | 380 6 | 400 4 | 410 4 | 470 4 | 470 5 | 500 4 | 450 5 | 440 6 | 450 5 \ +490 5 | 430 6 | 350 5 | 300 5 | 130 3 | 110 5 | 210 4 | 270 4 | 200 3 | 90 3 | 160 3 | 0 + +160 4 | 200 5 | 260 3 | 200 4 | 220 4 | 290 5 | 340 5 | 370 1 | 320 5 | 370 4 | 440 5 \ +580 7 | 430 2 | 500 5 | 340 6 | 340 5 | 440 4 | 380 4 | 320 5 | 430 5 | 350 5 | 490 5 \ +520 6 | 500 4 | 550 5 | 660 5 | 620 5 | 800 5 | 660 5 | 670 6 | 580 7 | 450 6 | 390 5 \ +630 5 | 550 7 | 750 7 | 530 6 | 560 6 | 640 6 | 500 7 | 570 6 | 540 6 | 530 6 | 640 5 \ +490 5 | 640 5 | 420 4 | 520 4 | 530 6 | 540 5 | 380 3 | 430 4 | 490 6 | 450 4 | 500 7 \ +520 5 | 500 7 | 510 6 | 360 4 | 190 5 | 220 6 | 240 4 | 240 5 | 210 0 | 130 2 | 130 2 | 0 + +210 4 | 230 6 | 360 5 | 710 6 | 830 6 | 550 6 | 670 6 | 650 8 | 810 8 | 780 7 | 820 7 \ +900 8 | 670 8 | 710 8 | 670 8 | 820 8 | 820 6 | 640 6 | 570 7 | 680 6 | 520 6 | 600 5 \ +530 6 | 560 6 | 760 6 | 680 8 | 490 6 | 440 4 | 410 5 | 540 4 | 360 6 | 540 5 | 600 6 \ +380 5 | 470 5 | 500 3 | 210 2 | 240 5 | 330 4 | 290 4 | 430 3 | 250 4 | 430 5 | 450 5 \ +410 6 | 360 4 | 310 4 | 420 4 | 430 5 | 270 4 | 140 2 | 220 4 | 290 4 | 50 4 | 310 4 \ +230 4 | 180 4 | 130 2 | 120 2 | 100 1 | 80 2 | 160 4 | 200 2 | 100 2 | 110 2 | 190 4 | 0 + +110 3 | 180 4 | 100 5 | 210 6 | 500 9 | 570 8 | 590 9 | 610 10 | 700 10 | 700 11 \ +640 12 | 580 9 | 630 11 | 670 9 | 470 7 | 570 11 | 740 9 | 610 11 | 660 13 | 670 8 \ +500 8 | 550 7 | 540 10 | 490 7 | 640 8 | 550 9 | 570 7 | 480 7 | 480 9 | 630 7 | 460 10 \ +640 9 | 610 9 | 480 8 | 490 8 | 560 9 | 410 5 | 420 6 | 470 5 | 420 5 | 540 7 | 420 5 \ +540 7 | 480 5 | 530 7 | 520 7 | 390 8 | 480 4 | 320 3 | 390 6 | 290 4 | 240 3 | 260 5 \ +340 5 | 250 4 | 420 7 | 360 7 | 320 6 | 300 6 | 290 6 | 260 5 | 320 5 | 330 8 | 350 8 \ +120 2 | 190 5 | 0 + +130 2 | 380 5 | 220 5 | 410 5 | 510 7 | 530 5 | 610 5 | 720 8 | 540 5 | 600 5 | 510 7 \ +610 7 | 540 6 | 580 5 | 440 8 | 540 6 | 470 6 | 480 6 | 450 6 | 510 6 | 360 4 | 470 5 \ +380 4 | 350 4 | 440 5 | 500 5 | 440 4 | 410 4 | 420 5 | 590 6 | 350 5 | 530 4 | 460 4 \ +300 4 | 500 4 | 370 4 | 190 2 | 240 4 | 490 4 | 390 4 | 520 5 | 490 4 | 500 4 | 570 6 \ +410 4 | 500 5 | 370 3 | 420 5 | 390 4 | 210 4 | 140 1 | 150 4 | 290 4 | 170 4 | 210 3 \ +310 4 | 120 1 | 250 4 | 140 3 | 130 3 | 110 2 | 200 3 | 210 4 | 270 3 | 130 4 | 180 3 | 0 diff --git a/src/openalea/sequence_analysis/data/pin_laricio_12.seq b/src/openalea/sequence_analysis/data/pin_laricio_12.seq new file mode 100644 index 0000000..786e54d --- /dev/null +++ b/src/openalea/sequence_analysis/data/pin_laricio_12.seq @@ -0,0 +1,361 @@ +# arbres de 12 ans, parcelle 607 + +# 5 VARIABLES + +# VARIABLE 1 : TIME +# VARIABLE 2 : INT longueur de la pousse +# VARIABLE 3 : INT nombre de branches par UC inter +# VARIABLE 4 : INT nombre de branches totale UC intra et inter +# VARIABLE 5 : INT nombre de cycles + +INDEX_PARAMETER : TIME + +4 VARIABLES + +VARIABLE 1 : INT # longueur de la pousse +VARIABLE 2 : INT # nombre de branches par UC inter +VARIABLE 3 : INT # nombre de branches totale UC intra et inter +VARIABLE 4 : INT # nombre de cycles + +1985 55 2 2 1 \ +1986 100 6 6 1 \ +1987 75 4 4 1 \ +1988 320 6 6 1 \ +1989 380 8 8 1 \ +1990 300 6 6 1 \ +1991 250 5 5 1 \ +1992 560 7 7 1 \ +1993 610 8 8 1 \ +1994 580 6 6 1 \ +1995 580 7 7 1 + +1985 30 2 2 1 \ +1986 45 2 2 1 \ +1987 30 3 3 1 \ +1988 130 3 3 1 \ +1989 235 4 4 1 \ +1990 130 6 6 1 \ +1991 85 6 6 1 \ +1992 235 5 5 1 \ +1993 270 5 5 1 \ +1994 320 5 5 1 \ +1995 430 6 6 1 + +1985 70 2 2 1 \ +1986 110 5 5 1 \ +1987 100 5 5 1 \ +1988 240 5 5 1 \ +1989 330 5 5 1 \ +1990 300 8 8 1 \ +1991 170 3 3 1 \ +1992 360 6 6 1 \ +1993 450 4 4 1 \ +1994 520 5 5 1 \ +1995 580 6 6 1 + +1985 90 1 1 1 \ +1986 75 6 6 1 \ +1987 175 5 5 1 \ +1988 390 5 5 1 \ +1989 350 5 5 1 \ +1990 300 4 4 1 \ +1991 270 7 7 1 \ +1992 420 5 5 1 \ +1993 540 5 5 1 \ +1994 420 3 3 1 \ +1995 490 7 7 1 + +1986 130 2 2 1 \ +1987 65 4 4 1 \ +1988 85 2 2 1 \ +1989 185 4 4 1 \ +1990 155 5 5 1 \ +1991 110 5 5 1 \ +1992 360 7 7 1 \ +1993 310 4 4 1 \ +1994 470 6 6 1 \ +1995 640 8 8 1 + +1985 50 4 4 1 \ +1986 80 5 5 1 \ +1987 100 6 6 1 \ +1988 350 5 8 2 \ +1989 340 5 7 2 \ +1990 270 6 6 1 \ +1991 240 3 3 1 \ +1992 420 7 7 1 \ +1993 490 5 5 1 \ +1994 450 5 5 1 \ +1995 460 7 7 1 + +1985 60 1 1 1 \ +1986 110 3 3 1 \ +1987 75 5 5 1 \ +1988 215 5 5 1 \ +1989 250 6 6 1 \ +1990 170 6 6 1 \ +1991 130 4 4 1 \ +1992 260 6 6 1 \ +1993 240 5 5 1 \ +1994 360 6 6 1 \ +1995 520 7 7 1 + +1985 50 2 2 1 \ +1986 90 3 3 1 \ +1987 70 5 5 1 \ +1988 180 4 4 1 \ +1989 330 5 5 1 \ +1990 180 5 5 1 \ +1991 110 4 4 1 \ +1992 380 5 5 1 \ +1993 370 5 5 1 \ +1994 390 5 5 1 \ +1995 480 6 6 1 + +1986 75 1 1 1 \ +1987 145 2 2 1 \ +1988 180 2 2 1 \ +1989 340 5 10 2 \ +1990 250 5 5 1 \ +1991 240 4 4 1 \ +1992 260 5 5 1 \ +1993 290 5 5 1 \ +1994 510 7 7 1 \ +1995 580 7 7 1 + +1985 60 3 3 1 \ +1986 40 5 5 1 \ +1987 80 5 5 1 \ +1988 190 5 5 1 \ +1989 280 8 8 1 \ +1990 230 5 5 1 \ +1991 260 6 6 1 \ +1992 470 7 7 1 \ +1993 500 7 7 1 \ +1994 530 8 8 1 \ +1995 600 8 8 1 + +1985 60 1 1 1 \ +1986 60 3 3 1 \ +1987 60 3 3 1 \ +1988 190 4 4 1 \ +1989 240 5 5 1 \ +1990 170 5 5 1 \ +1991 70 2 2 1 \ +1992 160 5 5 1 \ +1993 180 4 4 1 \ +1994 330 5 5 1 \ +1995 440 7 7 1 + +1985 40 2 2 1 \ +1986 80 2 2 1 \ +1987 40 4 4 1 \ +1988 230 5 5 1 \ +1989 310 5 5 1 \ +1990 190 5 5 1 \ +1991 170 5 5 1 \ +1992 350 6 6 1 \ +1993 350 5 5 1 \ +1994 460 7 7 1 \ +1995 500 7 7 1 + +1985 30 1 1 1 \ +1986 100 1 1 1 \ +1987 45 3 3 1 \ +1988 125 3 3 1 \ +1989 230 5 5 1 \ +1990 170 5 5 1 \ +1991 160 4 4 1 \ +1992 380 5 5 1 \ +1993 380 5 5 1 \ +1994 400 5 5 1 \ +1995 470 6 6 1 + +1985 50 2 2 1 \ +1986 60 2 2 1 \ +1987 40 4 4 1 \ +1988 140 4 4 1 \ +1989 230 5 5 1 \ +1990 120 5 5 1 \ +1991 160 4 4 1 \ +1992 290 6 6 1 \ +1993 320 5 5 1 \ +1994 340 5 5 1 \ +1995 410 5 5 1 + +1985 40 1 1 1 \ +1986 130 5 5 1 \ +1987 130 6 6 1 \ +1988 290 6 6 1 \ +1989 440 6 6 1 \ +1990 360 4 4 1 \ +1991 360 6 6 1 \ +1992 510 6 6 1 \ +1993 460 8 8 1 \ +1994 490 8 8 1 \ +1995 590 6 6 1 + +1986 140 2 2 1 \ +1987 110 2 2 1 \ +1988 220 3 3 1 \ +1989 290 5 5 1 \ +1990 240 4 4 1 \ +1991 240 3 3 1 \ +1992 460 6 6 1 \ +1993 460 5 5 1 \ +1994 500 5 5 1 \ +1995 510 5 5 1 + +1985 50 1 1 1 \ +1986 90 4 4 1 \ +1987 140 5 5 1 \ +1988 150 4 4 1 \ +1989 260 5 5 1 \ +1990 250 4 4 1 \ +1991 230 3 3 1 \ +1992 430 6 6 1 \ +1993 440 7 7 1 \ +1994 460 5 5 1 \ +1995 620 8 8 1 + +1985 70 3 3 1 \ +1986 70 5 5 1 \ +1987 130 4 4 1 \ +1988 210 4 4 1 \ +1989 180 3 3 1 \ +1990 260 4 4 1 \ +1991 240 6 6 1 \ +1992 430 6 6 1 \ +1993 480 6 6 1 \ +1994 440 7 7 1 \ +1995 560 5 5 1 + +# 1986 0 2 2 1 \ +1987 75 3 3 1 \ +1988 170 5 5 1 \ +1989 280 4 4 1 \ +1990 230 5 5 1 \ +1991 160 3 3 1 \ +1992 290 4 4 1 \ +1993 280 3 3 1 \ +1994 440 5 5 1 \ +1995 510 3 3 1 + +1985 90 4 4 1 \ +1986 80 4 4 1 \ +1987 80 3 3 1 \ +1988 250 9 14 2 \ +1989 270 7 7 1 \ +1990 320 7 7 1 \ +1991 300 6 6 1 \ +1992 500 8 8 1 \ +1993 500 6 6 1 \ +1994 470 8 8 1 \ +1995 520 7 7 1 + +1985 80 4 4 1 \ +1986 130 4 4 1 \ +1987 60 4 4 1 \ +1988 520 7 7 1 \ +1989 380 7 7 1 \ +1990 300 5 5 1 \ +1991 230 6 6 1 \ +1992 400 6 6 1 \ +1993 370 5 5 1 \ +1994 510 7 7 1 \ +1995 620 7 7 1 + +1985 30 1 1 1 \ +1986 70 3 3 1 \ +1987 70 5 5 1 \ +1988 210 5 5 1 \ +1989 310 8 8 1 \ +1990 270 5 5 1 \ +1991 320 6 6 1 \ +1992 290 6 6 1 \ +1993 230 6 6 1 \ +1994 400 7 7 1 \ +1995 580 7 7 1 + +1985 50 1 1 1 \ +1986 80 5 5 1 \ +1987 90 3 7 2 \ +1988 170 4 9 2 \ +1989 270 7 7 1 \ +1990 190 6 6 1 \ +1991 180 4 4 1 \ +1992 390 6 6 1 \ +1993 420 6 6 1 \ +1994 480 8 8 1 \ +1995 600 8 8 1 + +1985 60 2 2 1 \ +1986 70 3 3 1 \ +1987 90 5 5 1 \ +1988 190 5 5 1 \ +1989 270 8 8 1 \ +1990 290 5 5 1 \ +1991 340 6 6 1 \ +1992 510 8 8 1 \ +1993 520 8 8 1 \ +1994 580 8 8 1 \ +1995 630 6 6 1 + +1985 40 3 3 1 \ +1986 75 3 3 1 \ +1987 45 4 4 1 \ +1988 160 3 6 2 \ +1989 165 6 6 1 \ +1990 180 5 5 1 \ +1991 195 5 7 2 \ +1992 300 3 3 1 \ +1993 400 4 4 1 \ +1994 430 5 5 1 \ +1995 530 6 6 1 + +1985 40 2 2 1 \ +1986 60 4 4 1 \ +1987 70 6 6 1 \ +1988 250 6 6 1 \ +1989 250 8 8 1 \ +1990 240 5 5 1 \ +1991 240 5 5 1 \ +1992 460 7 7 1 \ +1993 520 8 8 1 \ +1994 490 8 8 1 \ +1995 630 7 7 1 + +# 1986 0 1 1 1 \ +1987 130 4 4 1 \ +1988 200 4 4 1 \ +1989 230 7 7 1 \ +1990 200 6 6 1 \ +1991 160 4 4 1 \ +1992 340 6 6 1 \ +1993 410 5 5 1 \ +1994 450 6 6 1 \ +1995 520 6 6 1 + +1985 50 2 2 1 \ +1986 80 7 7 1 \ +1987 70 5 5 1 \ +1988 250 7 7 1 \ +1989 280 6 6 1 \ +1990 250 5 5 1 \ +1991 210 5 5 1 \ +1992 350 7 7 1 \ +1993 270 3 3 1 \ +1994 400 7 7 1 \ +1995 620 5 5 1 + +1985 30 3 3 1 \ +1986 60 3 3 1 \ +1987 60 2 2 1 \ +1988 120 4 4 1 \ +1989 280 5 5 1 \ +1990 210 7 7 1 \ +1991 90 3 3 1 \ +1992 350 7 7 1 \ +1993 350 6 6 1 \ +1994 380 7 7 1 \ +1995 430 6 6 1 diff --git a/src/openalea/sequence_analysis/data/pin_laricio_18.seq b/src/openalea/sequence_analysis/data/pin_laricio_18.seq new file mode 100644 index 0000000..cb0ea1e --- /dev/null +++ b/src/openalea/sequence_analysis/data/pin_laricio_18.seq @@ -0,0 +1,545 @@ +# arbres de 18 ans, parcelle 600 + +# 5 VARIABLES + +# VARIABLE 1 : TIME +# VARIABLE 2 : VALUE longueur de la pousse +# VARIABLE 3 : VALUE nombre de branches par UC inter +# VARIABLE 4 : VALUE nombre de branches totale UC intra et inter +# VARIABLE 5 : VALUE nombre de cycles + +INDEX_PARAMETER : TIME + +4 VARIABLES + +VARIABLE 1 : INT # longueur de la pousse +VARIABLE 2 : INT # nombre de branches par UC inter +VARIABLE 3 : INT # nombre de branches totale UC intra et inter +VARIABLE 4 : INT # nombre de cycles + +1979 30 1 1 1 \ +1980 100 4 4 1 \ +1981 90 5 5 1 \ +1982 150 3 7 2 \ +1983 350 6 6 1 \ +1984 365 6 6 1 \ +1985 200 5 5 1 \ +1986 270 5 5 1 \ +1987 190 5 5 1 \ +1988 440 7 7 1 \ +1989 410 6 6 1 \ +1990 560 7 7 1 \ +1991 430 5 5 1 \ +1992 700 8 8 1 \ +1993 410 5 5 1 \ +1994 650 7 7 1 \ +1995 760 8 8 1 + +1980 100 1 1 1 \ +1981 70 5 5 1 \ +1982 100 2 2 1 \ +1983 110 3 3 1 \ +1984 200 7 7 1 \ +1985 450 6 6 1 \ +1986 270 3 5 2 \ +1987 140 5 5 1 \ +1988 380 6 6 1 \ +1989 500 8 8 1 \ +1990 410 5 5 1 \ +1991 260 5 5 1 \ +1992 450 7 7 1 \ +1993 290 5 5 1 \ +1994 530 7 7 1 \ +1995 640 6 6 1 + +1980 50 2 2 1 \ +1981 40 3 3 1 \ +1982 200 5 5 1 \ +1983 395 8 8 1 \ +1984 360 6 6 1 \ +1985 345 7 7 1 \ +1986 360 6 6 1 \ +1987 310 6 6 1 \ +1988 540 8 8 1 \ +1989 510 11 11 1 \ +1990 340 5 5 1 \ +1991 410 7 7 1 \ +1992 680 8 8 1 \ +1993 600 8 14 2 \ +1994 430 5 5 1 \ +1995 750 8 13 2 + +1981 100 2 2 1 \ +1982 115 4 4 1 \ +1983 225 3 3 1 \ +1984 225 4 4 1 \ +1985 165 5 5 1 \ +1986 170 5 5 1 \ +1987 160 4 4 1 \ +1988 210 5 5 1 \ +1989 360 5 5 1 \ +1990 350 5 5 1 \ +1991 230 6 6 1 \ +1992 500 5 5 1 \ +1993 360 4 4 1 \ +1994 480 6 6 1 \ +1995 580 6 6 1 + +1979 70 1 1 1 \ +1980 70 3 3 1 \ +1981 70 3 3 1 \ +1982 110 8 8 1 \ +1983 170 4 4 1 \ +1984 100 3 3 1 \ +1985 100 4 4 1 \ +1986 120 3 3 1 \ +1987 70 4 4 1 \ +1988 70 7 7 1 \ +1989 90 5 5 1 \ +1990 90 3 3 1 \ +1991 40 4 4 1 \ +1992 140 5 5 1 \ +1993 150 4 4 1 \ +1994 310 6 6 1 \ +1995 380 5 5 1 + +1979 50 1 1 1 \ +1980 60 5 5 1 \ +1981 70 4 4 1 \ +1982 320 5 5 1 \ +1983 390 5 5 1 \ +1984 290 5 5 1 \ +1985 360 6 6 1 \ +1986 410 7 7 1 \ +1987 370 7 7 1 \ +1988 550 8 8 1 \ +1989 460 5 5 1 \ +1990 550 7 7 1 \ +1991 470 6 6 1 \ +1992 580 7 7 1 \ +1993 570 6 6 1 \ +1994 610 7 7 1 \ +1995 700 7 7 1 + +1980 80 1 1 1 \ +1981 60 1 1 1 \ +1982 130 2 2 1 \ +1983 340 8 8 1 \ +1984 260 6 6 1 \ +1985 210 6 6 1 \ +1986 240 3 3 1 \ +1987 220 4 4 1 \ +1988 440 7 7 1 \ +1989 440 7 7 1 \ +1990 420 6 6 1 \ +1991 290 4 4 1 \ +1992 620 8 8 1 \ +1993 460 6 6 1 \ +1994 460 7 7 1 \ +1995 590 8 8 1 + +1980 30 1 1 1 \ +1981 10 1 1 1 \ +1982 50 2 2 1 \ +1983 80 2 2 1 \ +1984 125 2 2 1 \ +1985 95 4 4 1 \ +1986 160 4 4 1 \ +1987 220 3 3 1 \ +1988 180 3 3 1 \ +1989 300 5 5 1 \ +1990 250 4 4 1 \ +1991 130 4 4 1 \ +1992 180 5 5 1 \ +1993 140 3 3 1 \ +1994 250 5 5 1 \ +1995 390 6 6 1 + +1980 90 1 1 1 \ +1981 60 2 2 1 \ +1982 90 1 1 1 \ +1983 320 8 8 1 \ +1984 180 5 5 1 \ +1985 100 5 5 1 \ +1986 130 5 5 1 \ +1987 210 7 7 1 \ +1988 330 8 8 1 \ +1989 370 7 7 1 \ +1990 440 6 6 1 \ +1991 290 6 6 1 \ +1992 530 8 8 1 \ +1993 370 7 7 1 \ +1994 460 9 9 1 \ +1995 550 8 8 1 + +1981 50 2 2 1 \ +1982 75 2 2 1 \ +1983 195 3 3 1 \ +1984 240 4 4 1 \ +1985 95 3 3 1 \ +1986 105 5 5 1 \ +1987 145 3 3 1 \ +1988 180 5 5 1 \ +1989 185 6 6 1 \ +1990 225 4 4 1 \ +1991 150 4 4 1 \ +1992 270 5 5 1 \ +1993 210 4 4 1 \ +1994 310 6 6 1 \ +1995 400 4 4 1 + +1979 90 1 1 1 \ +1980 70 3 3 1 \ +1981 50 3 3 1 \ +1982 80 3 3 1 \ +1983 170 4 4 1 \ +1984 220 5 5 1 \ +1985 180 5 5 1 \ +1986 150 3 3 1 \ +1987 130 3 3 1 \ +1988 150 5 5 1 \ +1989 230 5 5 1 \ +1990 250 5 5 1 \ +1991 150 4 4 1 \ +1992 270 7 7 1 \ +1993 340 6 6 1 \ +1994 440 6 6 1 \ +1995 500 7 7 1 + +1979 45 1 1 1 \ +1980 110 3 3 1 \ +1981 70 4 4 1 \ +1982 155 3 3 1 \ +1983 315 4 4 1 \ +1984 240 5 5 1 \ +1985 180 5 5 1 \ +1986 150 3 3 1 \ +1987 85 3 3 1 \ +1988 165 5 5 1 \ +1989 250 5 5 1 \ +1990 260 5 5 1 \ +1991 140 3 3 1 \ +1992 270 5 5 1 \ +1993 300 4 4 1 \ +1994 390 6 6 1 \ +1995 500 6 6 1 + +1979 70 2 2 1 \ +1980 40 1 1 1 \ +1981 30 2 2 1 \ +1982 200 5 5 1 \ +1983 340 8 8 1 \ +1984 210 5 5 1 \ +1985 190 5 5 1 \ +1986 230 5 5 1 \ +1987 160 3 3 1 \ +1988 140 4 4 1 \ +1989 180 5 5 1 \ +1990 330 4 4 1 \ +1991 150 5 5 1 \ +1992 490 6 6 1 \ +1993 240 7 7 1 \ +1994 430 4 4 1 \ +1995 620 6 6 1 + +1979 80 4 4 1 \ +1980 70 3 3 1 \ +1981 90 2 8 2 \ +1982 140 4 4 1 \ +1983 240 5 5 1 \ +1984 310 5 5 1 \ +1985 330 8 8 1 \ +1986 250 6 6 1 \ +1987 220 6 6 1 \ +1988 300 6 6 1 \ +1989 300 6 6 1 \ +1990 330 5 5 1 \ +1991 240 5 5 1 \ +1992 430 5 5 1 \ +1993 400 4 4 1 \ +1994 310 8 8 1 \ +1995 620 6 6 1 + +1979 40 2 2 1 \ +1980 70 2 2 1 \ +1981 55 3 3 1 \ +1982 75 2 3 2 \ +1983 310 5 8 2 \ +1984 175 4 7 2 \ +1985 170 2 2 1 \ +1986 135 4 4 1 \ +1987 190 5 5 1 \ +1988 190 4 4 1 \ +1989 280 5 5 1 \ +1990 270 5 5 1 \ +1991 130 2 2 1 \ +1992 270 5 5 1 \ +1993 300 4 4 1 \ +1994 400 4 4 1 \ +1995 700 6 6 1 + +1979 90 1 1 1 \ +1980 50 1 1 1 \ +1981 50 1 1 1 \ +1982 50 2 2 1 \ +1983 170 2 2 1 \ +1984 220 4 4 1 \ +1985 160 1 1 1 \ +1986 130 1 1 1 \ +1987 130 4 4 1 \ +1988 150 4 4 1 \ +1989 195 5 5 1 \ +1990 215 4 4 1 \ +1991 110 3 3 1 \ +1992 340 5 5 1 \ +1993 250 4 4 1 \ +1994 360 5 5 1 \ +1995 500 6 6 1 + +1979 90 1 1 1 \ +1980 70 3 3 1 \ +1981 50 2 2 1 \ +1982 80 3 3 1 \ +1983 270 6 6 1 \ +1984 230 4 4 1 \ +1985 190 5 5 1 \ +1986 110 4 4 1 \ +1987 140 3 3 1 \ +1988 190 4 4 1 \ +1989 310 6 6 1 \ +1990 390 5 5 1 \ +1991 200 4 4 1 \ +1992 470 5 5 1 \ +1993 310 3 3 1 \ +1994 520 5 5 1 \ +1995 750 5 5 1 + +1979 70 3 3 1 \ +1980 70 4 4 1 \ +1981 80 3 3 1 \ +1982 170 4 4 1 \ +1983 460 6 6 1 \ +1984 350 6 6 1 \ +1985 250 3 3 1 \ +1986 180 4 4 1 \ +1987 300 6 6 1 \ +1988 460 6 6 1 \ +1989 540 7 7 1 \ +1990 540 5 5 1 \ +1991 390 5 5 1 \ +1992 620 8 8 1 \ +1993 530 5 5 1 \ +1994 590 7 7 1 \ +1995 740 7 7 1 + +1979 70 4 4 1 \ +1980 70 4 4 1 \ +1981 60 4 4 1 \ +1982 180 5 5 1 \ +1983 420 5 5 1 \ +1984 500 7 7 1 \ +1985 350 5 5 1 \ +1986 360 3 3 1 \ +1987 300 3 3 1 \ +1988 400 5 5 1 \ +1989 570 6 6 1 \ +1990 630 5 5 1 \ +1991 390 5 5 1 \ +1992 620 5 5 1 \ +1993 440 4 4 1 \ +1994 580 4 4 1 \ +1995 830 4 4 1 + +1980 100 2 2 1 \ +1981 80 6 6 1 \ +1982 180 4 4 1 \ +1983 380 6 6 1 \ +1984 310 6 6 1 \ +1985 200 5 5 1 \ +1986 160 5 5 1 \ +1987 220 5 5 1 \ +1988 430 7 7 1 \ +1989 480 8 8 1 \ +1990 440 5 5 1 \ +1991 250 5 5 1 \ +1992 540 8 8 1 \ +1993 450 5 5 1 \ +1994 640 4 4 1 \ +1995 720 7 7 1 + +1979 50 1 1 1 \ +1980 50 3 3 1 \ +1981 60 4 4 1 \ +1982 270 10 10 1 \ +1983 350 8 8 1 \ +1984 410 8 8 1 \ +1985 340 6 6 1 \ +1986 300 6 6 1 \ +1987 300 5 5 1 \ +1988 540 7 7 1 \ +1989 510 7 7 1 \ +1990 530 5 5 1 \ +1991 380 7 7 1 \ +1992 600 5 5 1 \ +1993 520 6 6 1 \ +1994 550 6 6 1 \ +1995 630 7 7 1 + +1979 20 2 2 1 \ +1980 70 2 2 1 \ +1981 60 3 3 1 \ +1982 230 5 5 1 \ +1983 400 5 5 1 \ +1984 400 5 5 1 \ +1985 350 5 5 1 \ +1986 360 5 5 1 \ +1987 330 5 5 1 \ +1988 570 8 8 1 \ +1989 610 7 7 1 \ +1990 460 5 5 1 \ +1991 290 5 5 1 \ +1992 620 5 5 1 \ +1993 560 4 4 1 \ +1994 580 6 6 1 \ +1995 720 5 5 1 + +1980 100 3 3 1 \ +1981 60 3 3 1 \ +1982 210 4 4 1 \ +1983 380 6 6 1 \ +1984 370 6 6 1 \ +1985 280 3 3 1 \ +1986 300 5 5 1 \ +1987 240 4 4 1 \ +1988 400 8 8 1 \ +1989 510 7 7 1 \ +1990 490 5 5 1 \ +1991 350 5 5 1 \ +1992 590 7 7 1 \ +1993 640 4 4 1 \ +1994 500 2 2 1 \ +1995 750 6 6 1 + +1980 130 3 3 1 \ +1981 100 6 6 1 \ +1982 240 5 5 1 \ +1983 330 9 9 1 \ +1984 400 7 7 1 \ +1985 340 7 7 1 \ +1986 260 5 5 1 \ +1987 350 6 6 1 \ +1988 500 8 8 1 \ +1989 530 7 7 1 \ +1990 390 4 4 1 \ +1991 440 5 5 1 \ +1992 640 8 8 1 \ +1993 450 8 8 1 \ +1994 490 7 7 1 \ +1995 470 5 5 1 + +1979 40 1 1 1 \ +1980 60 1 1 1 \ +1981 40 2 2 1 \ +1982 120 5 5 1 \ +1983 275 5 7 2 \ +1984 240 5 5 1 \ +1985 225 5 5 1 \ +1986 190 4 4 1 \ +1987 220 5 5 1 \ +1988 280 6 6 1 \ +1989 330 6 6 1 \ +1990 400 5 5 1 \ +1991 270 4 4 1 \ +1992 530 7 7 1 \ +1993 420 6 6 1 \ +1994 510 6 6 1 \ +1995 590 8 8 1 + +1979 60 1 1 1 \ +1980 90 4 4 1 \ +1981 110 5 5 1 \ +1982 360 6 6 1 \ +1983 400 9 9 1 \ +1984 500 10 10 1 \ +1985 410 7 7 1 \ +1986 290 7 7 1 \ +1987 260 6 6 1 \ +1988 580 10 10 1 \ +1989 660 10 10 1 \ +1990 510 7 7 1 \ +1991 480 8 8 1 \ +1992 720 10 10 1 \ +1993 570 8 8 1 \ +1994 620 6 6 1 \ +1995 730 7 7 1 + +1980 40 1 1 1 \ +1981 40 2 2 1 \ +1982 290 3 3 1 \ +1983 300 5 5 1 \ +1984 350 6 6 1 \ +1985 410 7 7 1 \ +1986 360 3 3 1 \ +1987 460 5 5 1 \ +1988 630 6 6 1 \ +1989 570 3 3 1 \ +1990 610 5 5 1 \ +1991 530 5 5 1 \ +1992 470 5 5 1 \ +1993 650 5 5 1 \ +1994 580 5 5 1 \ +1995 740 7 7 1 + +1979 40 1 1 1 \ +1980 30 2 2 1 \ +1981 80 2 2 1 \ +1982 280 10 10 1 \ +1983 250 6 6 1 \ +1984 290 6 6 1 \ +1985 230 5 5 1 \ +1986 220 4 4 1 \ +1987 190 3 3 1 \ +1988 320 8 8 1 \ +1989 480 6 6 1 \ +1990 430 6 6 1 \ +1991 210 5 5 1 \ +1992 470 4 4 1 \ +1993 430 4 4 1 \ +1994 510 4 4 1 \ +1995 620 5 5 1 + +1979 20 1 1 1 \ +1980 80 3 3 1 \ +1981 70 4 4 1 \ +1982 340 5 9 2 \ +1983 390 3 7 2 \ +1984 280 5 5 1 \ +1985 350 5 5 1 \ +1986 430 5 5 1 \ +1987 420 6 6 1 \ +1988 600 7 7 1 \ +1989 590 5 5 1 \ +1990 520 3 3 1 \ +1991 500 4 4 1 \ +1992 660 5 5 1 \ +1993 550 3 3 1 \ +1994 600 6 6 1 \ +1995 700 6 6 1 + +1979 120 4 4 1 \ +1980 120 4 4 1 \ +1981 80 4 4 1 \ +1982 450 5 8 2 \ +1983 450 3 5 2 \ +1984 340 5 5 1 \ +1985 470 8 8 1 \ +1986 420 4 4 1 \ +1987 400 4 4 1 \ +1988 710 6 6 1 \ +1989 650 5 5 1 \ +1990 430 5 5 1 \ +1991 390 6 6 1 \ +1992 630 7 7 1 \ +1993 475 5 5 1 \ +1994 565 7 7 1 \ +1995 690 7 7 1 diff --git a/src/openalea/sequence_analysis/data/pin_laricio_23.seq b/src/openalea/sequence_analysis/data/pin_laricio_23.seq new file mode 100644 index 0000000..4bcca67 --- /dev/null +++ b/src/openalea/sequence_analysis/data/pin_laricio_23.seq @@ -0,0 +1,302 @@ +# arbres de 23 ans, parcelle 528 + +# 5 VARIABLES + +# VARIABLE 1 : TIME +# VARIABLE 2 : VALUE longueur de la pousse +# VARIABLE 3 : VALUE nombre de branches par UC inter +# VARIABLE 4 : VALUE nombre de branches totale UC intra et inter +# VARIABLE 5 : VALUE nombre de cycles + +INDEX_PARAMETER : TIME + +4 VARIABLES + +VARIABLE 1 : INT # longueur de la pousse +VARIABLE 2 : INT # nombre de branches par UC inter +VARIABLE 3 : INT # nombre de branches totale UC intra et inter +VARIABLE 4 : INT # nombre de cycles + +1975 70 1 1 1 \ +1976 50 4 4 1 \ +1977 180 3 3 1 \ +1978 310 7 7 1 \ +1979 250 6 6 1 \ +1980 350 8 8 1 \ +1981 520 8 8 1 \ +1982 680 8 8 1 \ +1983 740 8 8 1 \ +1984 630 9 9 1 \ +1985 600 7 7 1 \ +1986 580 5 5 1 \ +1987 500 10 10 1 \ +1988 620 7 7 1 \ +1989 670 8 8 1 \ +1990 590 8 8 1 \ +1991 500 7 7 1 \ +1992 600 9 9 1 \ +1993 630 8 8 1 \ +1994 590 8 8 1 \ +1995 690 8 8 1 + +1975 110 1 1 1 \ +1976 40 1 1 1 \ +1977 230 7 7 1 \ +1978 400 5 5 1 \ +1979 270 4 4 1 \ +1980 210 5 5 1 \ +1981 390 6 6 1 \ +1982 510 7 7 1 \ +1983 670 5 5 1 \ +1984 650 8 8 1 \ +1985 470 5 5 1 \ +1986 410 4 4 1 \ +1987 290 3 3 1 \ +1988 740 7 7 1 \ +1989 610 5 5 1 \ +1990 500 6 6 1 \ +1991 650 5 5 1 \ +1992 780 7 7 1 \ +1993 730 6 6 1 \ +1994 790 6 6 1 \ +1995 800 8 8 1 + +1975 60 3 3 1 \ +1976 110 4 4 1 \ +1977 140 3 3 1 \ +1978 360 4 4 1 \ +1979 150 3 3 1 \ +1980 140 5 5 1 \ +1981 190 5 5 1 \ +1982 290 5 5 1 \ +1983 520 5 5 1 \ +1984 540 6 6 1 \ +1985 430 7 7 1 \ +1986 350 4 4 1 \ +1987 250 5 5 1 \ +1988 540 6 6 1 \ +1989 480 6 6 1 \ +1990 430 5 5 1 \ +1991 400 5 5 1 \ +1992 610 7 7 1 \ +1993 540 5 5 1 \ +1994 660 8 8 1 \ +1995 680 7 7 1 + +1976 140 3 3 1 \ +1977 190 4 4 1 \ +1978 330 7 7 1 \ +1979 300 6 6 1 \ +1980 360 6 6 1 \ +1981 540 8 8 1 \ +1982 730 8 8 1 \ +1983 610 7 7 1 \ +1984 630 8 8 1 \ +1985 470 7 7 1 \ +1986 370 3 3 1 \ +1987 490 5 5 1 \ +1988 740 8 8 1 \ +1989 620 7 7 1 \ +1990 650 7 7 1 \ +1991 480 6 6 1 \ +1992 610 7 7 1 \ +1993 610 6 6 1 \ +1994 630 7 7 1 \ +1995 710 7 7 1 + +1975 60 4 4 1 \ +1976 50 5 5 1 \ +1977 140 4 4 1 \ +1978 80 1 1 1 \ +1979 80 4 4 1 \ +1980 350 3 9 2 \ +1981 230 5 5 1 \ +1982 490 6 6 1 \ +1983 600 7 7 1 \ +1984 720 7 7 1 \ +1985 530 6 6 1 \ +1986 470 5 5 1 \ +1987 450 6 6 1 \ +1988 600 5 5 1 \ +1989 560 6 6 1 \ +1990 540 5 5 1 \ +1991 500 7 7 1 \ +1992 670 5 5 1 \ +1993 570 6 6 1 \ +1994 550 5 5 1 \ +1995 720 7 7 1 + +1976 140 1 1 1 \ +1977 80 1 1 1 \ +1978 320 4 4 1 \ +1979 340 5 5 1 \ +1980 350 6 6 1 \ +1981 590 7 7 1 \ +1982 590 6 6 1 \ +1983 660 7 7 1 \ +1984 660 7 7 1 \ +1985 530 5 5 1 \ +1986 560 5 5 1 \ +1987 470 7 7 1 \ +1988 810 5 5 1 \ +1989 680 6 6 1 \ +1990 540 6 6 1 \ +1991 540 6 6 1 \ +1992 620 8 8 1 \ +1993 610 6 6 1 \ +1994 640 7 7 1 \ +1995 680 8 8 1 + +1975 40 2 2 1 \ +1976 60 3 3 1 \ +1977 130 5 5 1 \ +1978 270 6 6 1 \ +1979 180 5 5 1 \ +1980 230 3 3 1 \ +1981 390 7 7 1 \ +1982 640 8 8 1 \ +1983 670 8 8 1 \ +1984 660 8 8 1 \ +1985 580 6 6 1 \ +1986 540 5 5 1 \ +1987 570 7 7 1 \ +1988 720 7 7 1 \ +1989 670 6 6 1 \ +1990 590 6 6 1 \ +1991 580 5 5 1 \ +1992 700 7 7 1 \ +1993 680 6 6 1 \ +1994 680 7 7 1 \ +1995 700 8 8 1 + +1975 120 4 4 1 \ +1976 50 4 4 1 \ +1977 250 4 7 2 \ +1978 290 4 4 1 \ +1979 290 5 5 1 \ +1980 470 5 5 1 \ +1981 570 5 5 1 \ +1982 640 6 6 1 \ +1983 760 6 6 1 \ +1984 730 6 6 1 \ +1985 630 6 6 1 \ +1986 620 6 6 1 \ +1987 610 5 5 1 \ +1988 760 8 8 1 \ +1989 680 6 6 1 \ +1990 690 8 8 1 \ +1991 540 6 6 1 \ +1992 600 6 6 1 \ +1993 540 6 6 1 \ +1994 630 8 8 1 \ +1995 700 6 6 1 + +1975 60 2 2 1 \ +1976 70 3 3 1 \ +1977 210 5 5 1 \ +1978 340 5 5 1 \ +1979 220 5 5 1 \ +1980 270 6 6 1 \ +1981 360 6 6 1 \ +1982 600 5 5 1 \ +1983 700 7 7 1 \ +1984 650 6 6 1 \ +1985 430 5 5 1 \ +1986 390 3 3 1 \ +1987 280 5 5 1 \ +1988 700 8 8 1 \ +1989 600 5 5 1 \ +1990 390 3 3 1 \ +1991 410 3 3 1 \ +1992 500 5 5 1 \ +1993 600 5 5 1 \ +1994 570 4 4 1 \ +1995 780 5 5 1 + +1975 160 1 1 1 \ +1976 50 1 1 1 \ +1977 80 3 3 1 \ +1978 190 3 3 1 \ +1979 220 4 4 1 \ +1980 260 6 6 1 \ +1981 400 7 7 1 \ +1982 540 6 6 1 \ +1983 650 7 7 1 \ +1984 580 6 6 1 \ +1985 540 7 7 1 \ +1986 490 0 0 1 \ +1987 160 3 3 1 \ +1988 630 9 9 1 \ +1989 610 5 5 1 \ +1990 580 6 6 1 \ +1991 530 5 5 1 \ +1992 600 7 7 1 \ +1993 590 5 5 1 \ +1994 620 6 6 1 \ +1995 700 5 5 1 + +1975 60 5 5 1 \ +1976 70 6 6 1 \ +1977 130 4 4 1 \ +1978 240 4 4 1 \ +1979 340 8 8 1 \ +1980 370 6 6 1 \ +1981 560 8 8 1 \ +1982 570 2 2 1 \ +1983 610 5 5 1 \ +1984 500 7 7 1 \ +1985 450 5 5 1 \ +1986 620 6 6 1 \ +1987 530 5 5 1 \ +1988 850 6 19 2 \ +1989 620 7 7 1 \ +1990 680 7 7 1 \ +1991 630 7 7 1 \ +1992 720 8 8 1 \ +1993 690 6 6 1 \ +1994 730 7 7 1 \ +1995 800 8 8 1 + +1975 90 2 2 1 \ +1976 100 3 3 1 \ +1977 130 3 3 1 \ +1978 340 5 13 2 \ +1979 270 3 3 1 \ +1980 320 5 5 1 \ +1981 440 8 8 1 \ +1982 560 8 8 1 \ +1983 510 8 8 1 \ +1984 560 8 8 1 \ +1985 480 4 4 1 \ +1986 530 5 5 1 \ +1987 480 6 6 1 \ +1988 670 5 5 1 \ +1989 700 7 7 1 \ +1990 610 7 7 1 \ +1991 590 6 6 1 \ +1992 630 6 6 1 \ +1993 590 6 6 1 \ +1994 610 7 7 1 \ +1995 660 6 6 1 + +1975 50 2 2 1 \ +1976 70 2 2 1 \ +1977 120 3 3 1 \ +1978 130 2 2 1 \ +1979 90 4 4 1 \ +1980 180 5 5 1 \ +1981 240 5 5 1 \ +1982 430 7 7 1 \ +1983 570 7 7 1 \ +1984 580 7 7 1 \ +1985 520 6 6 1 \ +1986 500 3 3 1 \ +1987 500 6 6 1 \ +1988 740 6 6 1 \ +1989 570 6 6 1 \ +1990 580 5 5 1 \ +1991 580 6 6 1 \ +1992 630 6 6 1 \ +1993 590 6 6 1 \ +1994 700 7 7 1 \ +1995 410 6 6 1 diff --git a/src/openalea/sequence_analysis/data/pin_laricio_3_gaussian_multivariate.hsc b/src/openalea/sequence_analysis/data/pin_laricio_3_gaussian_multivariate.hsc new file mode 100644 index 0000000..c4797e0 --- /dev/null +++ b/src/openalea/sequence_analysis/data/pin_laricio_3_gaussian_multivariate.hsc @@ -0,0 +1,49 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +3 STATES + +INITIAL_PROBABILITIES +0.4 0.3 0.3 + +TRANSITION_PROBABILITIES +0.0 0.5 0.5 +0.0 0.0 1.0 +0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.1 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.1 + + +2 OUTPUT_PROCESSES + +OUTPUT_PROCESS 1 : CONTINUOUS_PARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +GAUSSIAN MEAN : 5 STANDARD_DEVIATION : 5 +# GAUSSIAN MEAN : 5 STANDARD_DEVIATION : 10 + +STATE 1 OBSERVATION_DISTRIBUTION +GAUSSIAN MEAN : 20 STANDARD_DEVIATION : 10 +# GAUSSIAN MEAN : 30 STANDARD_DEVIATION : 10 + +STATE 2 OBSERVATION_DISTRIBUTION +GAUSSIAN MEAN : 60 STANDARD_DEVIATION : 10 +# GAUSSIAN MEAN : 60 STANDARD_DEVIATION : 10 + + +OUTPUT_PROCESS 2 : DISCRETE_PARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +# BINOMIAL INF_BOUND : 0 SUP_BOUND : 15 PROBABILITY : 0.1 +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.5 + +STATE 1 OBSERVATION_DISTRIBUTION +# BINOMIAL INF_BOUND : 0 SUP_BOUND : 15 PROBABILITY : 0.5 +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +# BINOMIAL INF_BOUND : 0 SUP_BOUND : 15 PROBABILITY : 0.9 +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.1 diff --git a/src/openalea/sequence_analysis/data/pin_laricio_6.hsc b/src/openalea/sequence_analysis/data/pin_laricio_6.hsc new file mode 100644 index 0000000..ec6b650 --- /dev/null +++ b/src/openalea/sequence_analysis/data/pin_laricio_6.hsc @@ -0,0 +1,73 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +6 STATES + +INITIAL_PROBABILITIES +0.2 0.2 0.2 0.2 0.1 0.1 + +TRANSITION_PROBABILITIES +0.0 0.2 0.2 0.2 0.2 0.2 +0.0 0.0 0.3 0.3 0.2 0.2 +0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + + +2 OUTPUT_PROCESSES + +OUTPUT_PROCESS 1 : PARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.02 + +STATE 2 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.015 + +STATE 3 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.02 + +STATE 4 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.03 + +STATE 5 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.05 + + +OUTPUT_PROCESS 2 : PARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.3 + +STATE 1 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.15 + +STATE 3 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.2 + +STATE 4 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.25 + +STATE 5 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 1 PROBABILITY : 0.3 diff --git a/src/openalea/sequence_analysis/data/pin_laricio_6.seq b/src/openalea/sequence_analysis/data/pin_laricio_6.seq new file mode 100644 index 0000000..cb35848 --- /dev/null +++ b/src/openalea/sequence_analysis/data/pin_laricio_6.seq @@ -0,0 +1,235 @@ +# arbres de 6 ans, parcelle 60 + +# 5 VARIABLES + +# VARIABLE 1 : TIME +# VARIABLE 2 : VALUE longueur de la pousse +# VARIABLE 3 : VALUE nombre de branches par UC inter +# VARIABLE 4 : VALUE nombre de branches totale UC intra et inter +# VARIABLE 5 : VALUE nombre de cycles + +INDEX_PARAMETER : TIME + +4 VARIABLES + +VARIABLE 1 : INT # longueur de la pousse +VARIABLE 2 : INT # nombre de branches par UC inter +VARIABLE 3 : INT # nombre de branches totale UC intra et inter +VARIABLE 4 : INT # nombre de cycles + +1990 20 1 1 1 \ +1991 30 3 3 1 \ +1992 100 5 5 1 \ +1993 90 5 5 1 \ +1994 260 5 5 1 \ +1995 370 6 6 1 + +1990 5 2 2 1 \ +1991 60 3 3 1 \ +1992 45 5 5 1 \ +1993 125 5 5 1 \ +1994 320 5 5 1 \ +1995 325 6 6 1 + +1990 60 6 6 1 \ +1991 105 5 5 1 \ +1992 95 4 4 1 \ +1993 185 6 6 1 \ +1994 365 7 7 1 \ +1995 670 8 8 1 + +1990 35 1 1 1 \ +1991 60 3 3 1 \ +1992 110 5 5 1 \ +1993 180 9 9 1 \ +1994 335 5 5 1 \ +1995 560 8 8 1 + +1990 50 1 1 1 \ +1991 75 3 3 1 \ +1992 155 8 8 1 \ +1993 230 5 6 2 \ +1994 380 6 6 1 \ +1995 570 8 8 1 + +1990 10 8 8 1 \ +1991 40 4 4 1 \ +1992 40 3 3 1 \ +1993 205 4 4 1 \ +1994 145 3 3 1 \ +1995 170 4 4 1 + +1990 50 2 2 1 \ +1991 85 1 1 1 \ +1992 145 6 6 1 \ +1993 410 9 9 1 \ +1994 430 3 3 1 \ +1995 720 9 9 1 + +1990 5 3 3 1 \ +1991 25 7 7 1 \ +1992 45 6 6 1 \ +1993 85 1 1 1 \ +1994 315 3 11 2 \ +1995 145 5 5 1 + +1990 45 2 2 1 \ +1991 55 3 3 1 \ +1992 65 4 4 1 \ +1993 95 5 5 1 \ +1994 295 5 5 1 \ +1995 250 6 6 1 + +1990 10 2 2 1 \ +1991 90 3 3 1 \ +1992 65 4 4 1 \ +1993 65 4 4 1 \ +1994 315 5 5 1 \ +1995 425 5 5 1 + +1990 20 3 3 1 \ +1991 50 5 5 1 \ +1992 110 6 6 1 \ +1993 205 5 5 1 \ +1994 265 7 7 1 \ +1995 515 6 6 1 + +1990 10 1 1 1 \ +1991 70 2 4 2 \ +1992 135 5 5 1 \ +1993 240 4 4 1 \ +1994 230 4 9 2 \ +1995 425 6 6 1 + +1990 20 1 1 1 \ +1991 50 3 3 1 \ +1992 125 4 4 1 \ +1993 275 5 5 1 \ +1994 415 5 5 1 \ +1995 525 5 5 1 + +1990 60 1 1 1 \ +1991 45 2 2 1 \ +1992 55 3 3 1 \ +1993 180 3 3 1 \ +1994 310 4 4 1 \ +1995 570 6 6 1 + +1990 10 3 3 1 \ +1991 70 4 4 1 \ +1992 80 7 7 1 \ +1993 245 5 5 1 \ +1994 405 6 6 1 \ +1995 485 8 8 1 + +1990 5 1 1 1 \ +1991 105 4 6 2 \ +1992 60 6 6 1 \ +1993 190 4 7 2 \ +1994 250 5 5 1 \ +1995 420 5 5 1 + +1990 45 1 1 1 \ +1991 95 3 3 1 \ +1992 150 7 7 1 \ +1993 260 5 5 1 \ +1994 305 6 6 1 \ +1995 495 8 8 1 + +1990 55 2 2 1 \ +1991 50 4 4 1 \ +1992 145 6 6 1 \ +1993 135 3 3 1 \ +1994 95 3 3 1 \ +1995 440 5 5 1 + +1990 75 2 2 1 \ +1991 65 3 3 1 \ +1992 120 6 6 1 \ +1993 380 6 10 2 \ +1994 365 5 5 1 \ +1995 575 8 8 1 + +1990 10 2 2 1 \ +1991 65 4 4 1 \ +1992 65 8 8 1 \ +1993 350 7 11 2 \ +1994 380 8 13 2 \ +1995 555 4 12 2 + +1990 20 2 2 1 \ +1991 75 5 5 1 \ +1992 90 6 6 1 \ +1993 310 6 6 1 \ +1994 535 7 10 2 \ +1995 515 6 6 1 + +1990 65 3 3 1 \ +1991 30 3 3 1 \ +1992 90 6 6 1 \ +1993 125 4 4 1 \ +1994 35 2 2 1 \ +1995 160 3 5 2 + +1990 25 1 1 1 \ +1991 65 3 3 1 \ +1992 100 6 6 1 \ +1993 300 4 4 1 \ +1994 260 5 5 1 \ +1995 610 8 8 1 + +1990 40 1 1 1 \ +1991 40 3 3 1 \ +1992 75 4 4 1 \ +1993 195 5 5 1 \ +1994 290 5 5 1 \ +1995 470 8 8 1 + +1990 15 2 2 1 \ +1991 95 7 7 1 \ +1992 110 5 5 1 \ +1993 260 6 6 1 \ +1994 450 6 6 1 \ +1995 590 8 8 1 + +1990 10 1 1 1 \ +1991 35 2 2 1 \ +1992 70 6 6 1 \ +1993 225 6 6 1 \ +1994 300 2 7 2 \ +1995 255 5 5 1 + +1990 10 1 1 1 \ +1991 90 5 5 1 \ +1992 110 6 6 1 \ +1993 220 6 6 1 \ +1994 220 6 6 1 \ +1995 390 8 8 1 + +1990 35 1 1 1 \ +1991 55 4 4 1 \ +1992 130 7 7 1 \ +1993 325 6 6 1 \ +1994 405 5 5 1 \ +1995 710 9 9 1 + +1990 10 3 3 1 \ +1991 55 3 3 1 \ +1992 85 4 4 1 \ +1993 190 4 8 2 \ +1994 345 6 6 1 \ +1995 435 6 6 1 + +1990 60 7 10 2 \ +1991 35 5 5 1 \ +1992 50 4 4 1 \ +1993 180 4 9 2 \ +1994 345 8 8 1 \ +1995 580 8 8 1 + +1990 10 1 1 1 \ +1991 85 3 3 1 \ +1992 125 6 6 1 \ +1993 270 6 6 1 \ +1994 310 6 6 1 \ +1995 590 7 7 1 diff --git a/src/openalea/sequence_analysis/data/pin_laricio_7x.seq b/src/openalea/sequence_analysis/data/pin_laricio_7x.seq new file mode 100644 index 0000000..959173c --- /dev/null +++ b/src/openalea/sequence_analysis/data/pin_laricio_7x.seq @@ -0,0 +1,72 @@ +# 3 VARIABLES + +# VARIABLE 1 : TIME +# VARIABLE 2 : VALUE longueur de l'UC +# VARIABLE 3 : VALUE nombre de branches par UC + +INDEX_PARAMETER : TIME + +2 VARIABLES + +VARIABLE 1 : INT # longueur de l'UC +VARIABLE 2 : INT # nombre de branches par UC + +1927 330 7 | 1928 290 8 | 1929 250 6 | 1930 320 5 | 1931 440 5 | 1932 440 7 | 1933 480 4 | 1934 470 6 \ +1935 570 5 | 1936 640 7 | 1937 700 6 | 1938 650 1 | 1939 590 5 | 1940 680 6 | 1941 640 5 | 1942 630 6 \ +1943 530 5 | 1944 540 5 | 1945 670 7 | 1946 750 7 | 1947 530 5 | 1948 820 6 | 1949 590 6 | 1950 650 5 \ +1951 650 6 | 1952 700 6 | 1953 710 5 | 1954 660 6 | 1955 650 5 | 1956 660 4 | 1957 620 4 | 1958 510 5 \ +1959 630 6 | 1960 760 4 | 1961 540 1 | 1962 360 4 | 1963 500 4 | 1964 460 5 | 1965 460 4 | 1966 520 4 \ +1967 610 5 | 1968 560 5 | 1969 550 5 | 1970 540 5 | 1971 560 3 | 1972 380 3 | 1973 350 3 | 1974 450 2 \ +1975 180 4 | 1976 470 5 | 1977 350 5 | 1978 520 2 | 1979 390 5 | 1980 420 5 | 1981 510 5 | 1982 240 0 \ +1983 250 8 | 1984 290 3 | 1985 280 2 | 1986 190 4 | 1987 230 3 | 1988 400 4 | 1989 380 5 | 1990 240 4 \ +1991 180 3 | 1992 210 4 # (1) + +1927 110 1 | 1928 210 3 | 1929 180 3 | 1930 170 4 | 1931 270 4 | 1932 360 6 | 1933 410 5 | 1934 390 6 \ +1935 410 6 | 1936 600 6 | 1937 590 5 | 1938 630 6 | 1939 640 7 | 1940 610 5 | 1941 600 8 | 1942 620 7 \ +1943 650 6 | 1944 680 7 | 1945 710 4 | 1946 830 7 | 1947 660 8 | 1948 870 7 | 1949 630 7 | 1950 520 6 \ +1951 610 6 | 1952 630 7 | 1953 790 5 | 1954 640 6 | 1955 640 6 | 1956 680 7 | 1957 560 5 | 1958 670 8 \ +1959 700 5 | 1960 580 5 | 1961 410 4 | 1962 550 5 | 1963 370 4 | 1964 380 5 | 1965 510 6 | 1966 360 6 \ +1967 610 6 | 1968 490 6 | 1969 470 5 | 1970 610 6 | 1971 450 5 | 1972 370 5 | 1973 380 6 | 1974 400 4 \ +1975 410 4 | 1976 470 4 | 1977 470 5 | 1978 500 4 | 1979 450 5 | 1980 440 6 | 1981 450 5 | 1982 490 5 \ +1983 430 6 | 1984 350 5 | 1985 300 5 | 1986 130 3 | 1987 110 5 | 1988 210 4 | 1989 270 4 | 1990 200 3 \ +1991 90 3 | 1992 160 3 | 1993 110 4 | 1994 150 4 # (2) + +1927 160 4 | 1928 200 5 | 1929 260 3 | 1930 200 4 | 1931 220 4 | 1932 290 5 | 1933 340 5 | 1934 370 1 \ +1935 320 5 | 1936 370 4 | 1937 440 5 | 1938 580 7 | 1939 430 2 | 1940 500 5 | 1941 340 6 | 1942 340 5 \ +1943 440 4 | 1944 380 4 | 1945 320 5 | 1946 430 5 | 1947 350 5 | 1948 490 5 | 1949 520 6 | 1950 500 4 \ +1951 550 5 | 1952 660 5 | 1953 620 5 | 1954 800 5 | 1955 660 5 | 1956 670 6 | 1957 580 7 | 1958 450 6 \ +1959 390 5 | 1960 630 5 | 1961 550 7 | 1962 750 7 | 1963 530 6 | 1964 560 6 | 1965 640 6 | 1966 500 7 \ +1967 570 6 | 1968 540 6 | 1969 530 6 | 1970 640 5 | 1971 490 5 | 1972 640 5 | 1973 420 4 | 1974 520 4 \ +1975 530 6 | 1976 540 5 | 1977 380 3 | 1978 430 4 | 1979 490 6 | 1980 450 4 | 1981 500 7 | 1982 520 5 \ +1983 500 7 | 1984 510 6 | 1985 360 4 | 1986 190 5 | 1987 220 6 | 1988 240 4 | 1989 240 5 | 1990 210 0 \ +1991 130 2 | 1992 130 2 | 1993 110 3 | 1994 130 4 # (3) + +1927 210 4 | 1928 230 6 | 1929 360 5 | 1930 710 6 | 1931 830 6 | 1932 550 6 | 1933 670 6 | 1934 650 8 \ +1935 810 8 | 1936 780 7 | 1937 820 7 | 1938 900 8 | 1939 670 8 | 1940 710 8 | 1941 670 8 | 1942 820 8 \ +1943 820 6 | 1944 640 6 | 1945 570 7 | 1946 680 6 | 1947 520 6 | 1948 600 5 | 1949 530 6 | 1950 560 6 \ +1951 760 6 | 1952 680 8 | 1953 490 6 | 1954 440 4 | 1955 410 5 | 1956 540 4 | 1957 360 6 | 1958 540 5 \ +1959 600 6 | 1960 380 5 | 1961 470 5 | 1962 500 3 | 1963 210 2 | 1964 240 5 | 1965 330 4 | 1966 290 4 \ +1967 430 3 | 1968 250 4 | 1969 430 5 | 1970 450 5 | 1971 410 6 | 1972 360 4 | 1973 310 4 | 1974 420 4 \ +1975 430 5 | 1976 270 4 | 1977 140 2 | 1978 220 4 | 1979 290 4 | 1980 50 4 | 1981 310 4 | 1982 230 4 \ +1983 180 4 | 1984 130 2 | 1985 120 2 | 1986 100 1 | 1987 80 2 | 1988 160 4 | 1989 200 2 | 1990 100 2 \ +1991 110 2 | 1992 190 4 | 1993 140 4 | 1994 260 3 # (4) + +1927 110 3 | 1928 180 4 | 1929 100 5 | 1930 210 6 | 1931 500 9 | 1932 570 8 | 1933 590 9 | 1934 610 10 \ +1935 700 10 | 1936 700 11 | 1937 640 12 | 1938 580 9 | 1939 630 11 | 1940 670 9 | 1941 470 7 | 1942 570 11 \ +1943 740 9 | 1944 610 11 | 1945 660 13 | 1946 670 8 | 1947 500 8 | 1948 550 7 | 1949 540 10 | 1950 490 7 \ +1951 640 8 | 1952 550 9 | 1953 570 7 | 1954 480 7 | 1955 480 9 | 1956 630 7 | 1957 460 10 | 1958 640 9 \ +1959 610 9 | 1960 480 8 | 1961 490 8 | 1962 560 9 | 1963 410 5 | 1964 420 6 | 1965 470 5 | 1966 420 5 \ +1967 540 7 | 1968 420 5 | 1969 540 7 | 1970 480 5 | 1971 530 7 | 1972 520 7 | 1973 390 8 | 1974 480 4 \ +1975 320 3 | 1976 390 6 | 1977 290 4 | 1978 240 3 | 1979 260 5 | 1980 340 5 | 1981 250 4 | 1982 420 7 \ +1983 360 7 | 1984 320 6 | 1985 300 6 | 1986 290 6 | 1987 260 5 | 1988 320 5 | 1989 330 8 | 1990 350 8 \ +1991 120 2 | 1992 190 5 | 1993 260 6 | 1994 270 5 # (5) + +1927 130 2 | 1928 380 5 | 1929 220 5 | 1930 410 5 | 1931 510 7 | 1932 530 5 | 1933 610 5 | 1934 720 8 \ +1935 540 5 | 1936 600 5 | 1937 510 7 | 1938 610 7 | 1939 540 6 | 1940 580 5 | 1941 440 8 | 1942 540 6 \ +1943 470 6 | 1944 480 6 | 1945 450 6 | 1946 510 6 | 1947 360 4 | 1948 470 5 | 1949 380 4 | 1950 350 4 \ +1951 440 5 | 1952 500 5 | 1953 440 4 | 1954 410 4 | 1955 420 5 | 1956 590 6 | 1957 350 5 | 1958 530 4 \ +1959 460 4 | 1960 300 4 | 1961 500 4 | 1962 370 4 | 1963 190 2 | 1964 240 4 | 1965 490 4 | 1966 390 4 \ +1967 520 5 | 1968 490 4 | 1969 500 4 | 1970 570 6 | 1971 410 4 | 1972 500 5 | 1973 370 3 | 1974 420 5 \ +1975 390 4 | 1976 210 4 | 1977 140 1 | 1978 150 4 | 1979 290 4 | 1980 170 4 | 1981 210 3 | 1982 310 4 \ +1983 120 1 | 1984 250 4 | 1985 140 3 | 1986 130 3 | 1987 110 2 | 1988 200 3 | 1989 210 4 | 1990 270 3 \ +1991 130 4 | 1992 180 3 | 1993 150 3 | 1994 280 3 # (6) diff --git a/src/openalea/sequence_analysis/data/reinet1.hsc b/src/openalea/sequence_analysis/data/reinet1.hsc new file mode 100644 index 0000000..a8f2e96 --- /dev/null +++ b/src/openalea/sequence_analysis/data/reinet1.hsc @@ -0,0 +1,78 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +7 STATES + +INITIAL_PROBABILITIES +0.5 0.5 0.0 0.0 0.0 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.4 0.3 0.3 0.0 0.0 0.0 +0.0 0.0 0.4 0.3 0.3 0.0 0.0 +0.0 0.0 0.0 0.4 0.3 0.3 0.0 +0.0 0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 5 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.1 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.3 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.3 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.1 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 diff --git a/src/openalea/sequence_analysis/data/reinet1.seq b/src/openalea/sequence_analysis/data/reinet1.seq new file mode 100644 index 0000000..d7824c2 --- /dev/null +++ b/src/openalea/sequence_analysis/data/reinet1.seq @@ -0,0 +1,66 @@ +1 VARIABLE + +VARIABLE 1 : INT # 5 values + +# value histogram - size of the sample: 1168 +# mean: 1.24658 variance: 2.28191 standard deviation: 1.5106 + +# | value histogram +# 0 584 latent bud +# 1 176 short shoot +# 2 119 long shoot +# 3 114 fruiting shoot +# 4 175 immediate shoot + +# sequence length histogram - size of the sample: 16 +# mean: 73 variance: 14.2667 standard deviation: 3.77712 + +# cumulative length: 1168 + +0 0 0 2 0 1 0 0 3 0 3 3 3 0 3 3 1 3 3 3 3 0 1 4 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 \ +0 1 0 0 2 0 2 0 0 2 0 2 0 2 0 2 2 2 1 2 0 2 0 2 1 0 0 0 0 0 0 0 0 0 + +2 2 3 3 3 3 1 3 3 3 3 3 3 3 0 0 0 0 4 4 4 4 4 2 4 4 4 4 0 0 0 0 4 0 0 0 0 0 1 0 1 \ +1 1 2 1 0 2 1 1 1 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 + +0 2 2 2 1 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 4 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 \ +0 0 1 0 0 0 0 1 1 1 1 1 1 0 1 1 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 3 0 3 3 0 3 0 0 0 0 0 0 4 4 4 4 4 4 4 4 4 4 4 4 4 0 0 0 0 4 0 0 0 0 \ +0 1 0 1 0 2 2 1 1 0 2 2 2 1 2 2 2 0 0 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 0 + +0 0 2 0 0 1 0 0 0 0 3 4 4 0 0 2 0 0 0 0 0 3 3 2 4 4 4 4 4 4 4 4 4 4 2 4 4 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 + +0 0 0 2 0 0 0 0 3 3 3 3 3 3 3 0 3 0 3 3 3 3 4 2 4 4 4 4 2 4 4 4 0 0 0 0 0 0 0 0 0 \ +0 2 0 1 1 2 1 0 1 2 1 0 2 1 1 2 1 1 2 0 0 0 2 2 0 2 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 1 0 3 0 3 3 0 0 1 0 3 0 0 0 0 3 4 4 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 \ +0 0 0 0 0 0 1 0 0 2 0 0 1 1 1 0 1 1 1 1 0 1 2 0 0 2 0 0 0 0 0 0 0 0 0 + +0 0 2 0 0 0 0 3 0 0 3 0 3 1 0 3 0 3 0 0 3 0 4 4 4 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 \ +0 0 0 0 0 1 0 1 2 1 0 1 1 0 2 0 2 1 2 2 1 1 1 2 2 1 2 2 0 0 0 0 0 0 0 0 0 0 + +2 2 2 2 2 0 0 1 0 3 3 3 0 3 0 3 3 0 3 3 0 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 0 0 2 0 1 \ +0 0 1 2 0 1 0 2 0 0 1 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +2 2 2 1 1 3 3 0 3 0 0 3 0 0 3 0 0 3 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 1 0 0 \ +0 0 1 0 0 1 1 0 0 1 2 1 0 0 1 1 1 0 1 0 1 2 0 0 0 0 0 0 0 + +0 0 2 0 2 0 3 3 3 0 3 3 0 0 3 4 4 4 4 4 4 4 4 4 4 4 4 0 4 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 1 0 1 1 1 2 1 2 0 2 1 1 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 2 0 3 3 0 3 3 3 3 3 3 3 0 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 4 0 0 0 0 0 0 0 0 \ +0 1 0 0 2 1 0 1 1 2 2 1 2 2 1 2 1 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 2 0 0 0 0 0 0 3 3 3 3 3 3 3 0 0 0 0 0 0 0 4 4 4 4 4 4 4 4 4 0 4 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 1 0 1 1 0 2 1 1 1 1 1 1 1 2 1 0 2 0 0 0 0 0 0 0 + +2 2 2 3 0 3 3 3 0 3 0 3 3 0 0 3 0 4 4 4 4 4 4 4 4 4 4 0 4 0 0 0 0 0 0 0 0 0 0 0 1 \ +2 1 1 0 1 1 0 2 2 1 2 1 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 + +2 2 3 0 3 0 0 3 0 3 0 3 0 0 3 0 0 4 4 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 2 0 0 \ +0 0 2 1 1 1 1 1 0 0 1 2 2 0 1 0 1 2 0 2 0 0 0 0 0 0 0 + +0 0 0 2 2 2 1 0 0 3 0 3 3 3 3 3 3 3 2 4 4 4 4 4 4 4 4 4 4 0 4 0 0 0 0 0 0 0 0 0 1 \ +0 0 1 1 1 1 0 2 0 1 1 0 1 0 1 1 1 1 0 1 2 0 0 0 2 0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/sequences1.seq b/src/openalea/sequence_analysis/data/sequences1.seq new file mode 100644 index 0000000..49376de --- /dev/null +++ b/src/openalea/sequence_analysis/data/sequences1.seq @@ -0,0 +1,7 @@ +1 VARIABLE + +VARIABLE 1 : STATE + +1 0 0 0 1 1 2 0 2 2 2 1 1 0 1 0 1 1 1 1 0 1 1 1 0 1 2 2 2 1 + +0 0 0 1 1 0 2 0 2 2 2 1 1 1 1 0 1 0 0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/sequences2.seq b/src/openalea/sequence_analysis/data/sequences2.seq new file mode 100644 index 0000000..5040680 --- /dev/null +++ b/src/openalea/sequence_analysis/data/sequences2.seq @@ -0,0 +1,8 @@ +2 VARIABLES + +VARIABLE 1 : STATE +VARIABLE 2 : STATE + +1 0 | 0 0 | 1 0 | 2 0 | 2 1 | 2 1 | 1 0 | 1 0 | 1 0 | 0 1 | 0 1 | 1 1 | 0 1 | 2 0 | 2 1 + +0 0 | 0 0 | 1 0 | 2 0 | 2 1 | 1 1 | 1 0 | 1 0 | 0 0 | 0 0 diff --git a/src/openalea/sequence_analysis/data/sequences_tutorial.dat b/src/openalea/sequence_analysis/data/sequences_tutorial.dat new file mode 100644 index 0000000..1f2e2f8 --- /dev/null +++ b/src/openalea/sequence_analysis/data/sequences_tutorial.dat @@ -0,0 +1,12 @@ +1 VARIABLE + +VARIABLE 1 : VALUE + +0 0 0 0 0 3 3 0 3 3 0 0 4 1 4 4 0 0 0 2 1 0 3 0 0 0 0 2 0 3 3 0 1 4 3 0 0 0 0 0 0 0 0 0 4 0 4 +0 0 0 0 0 0 0 0 0 4 0 0 0 1 0 3 0 1 0 0 0 0 0 1 0 +0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 4 1 3 4 4 0 0 0 4 4 0 3 0 0 0 0 1 0 2 0 4 4 0 0 0 0 4 0 4 4 0 4 4 0 4 4 0 4 0 0 0 0 0 +0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 3 3 0 0 4 0 4 0 0 0 0 0 4 0 0 0 4 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 4 1 0 3 3 0 4 0 4 0 1 3 2 0 0 +0 0 0 0 0 0 0 0 4 4 0 4 0 3 0 0 0 4 0 0 0 0 0 4 0 1 0 0 0 0 0 0 2 1 1 4 2 0 0 0 0 0 0 0 4 4 0 0 4 0 4 0 0 0 0 4 4 +0 0 0 0 0 0 0 0 0 0 0 3 0 2 0 0 0 0 0 4 0 0 0 0 0 1 0 0 1 0 0 1 0 1 3 3 3 4 0 2 0 2 3 0 0 0 0 0 +0 0 3 0 0 0 +0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/switching_lmm_irred.hsc b/src/openalea/sequence_analysis/data/switching_lmm_irred.hsc new file mode 100644 index 0000000..29b2d97 --- /dev/null +++ b/src/openalea/sequence_analysis/data/switching_lmm_irred.hsc @@ -0,0 +1,34 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +3 STATES + +INITIAL_PROBABILITIES +1 0 0 + +TRANSITION_PROBABILITIES +0 0.8 0.2 +0.4 0 0.6 +0.8 0.2 0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 10 PROBABILITY : 0.8 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 5 PROBABILITY : 0.5 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 2 PROBABILITY : 0.2 + + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : CONTINUOUS_PARAMETRIC + +STATE 0 OBSERVATION_MODEL +LINEAR_MODEL INTERCEPT : 0 SLOPE : 0 STANDARD_DEVIATION : 1 + +STATE 1 OBSERVATION_MODEL +LINEAR_MODEL INTERCEPT : 3 SLOPE : 3 STANDARD_DEVIATION : 40 + +STATE 2 OBSERVATION_MODEL +LINEAR_MODEL INTERCEPT : 6 SLOPE : 6 STANDARD_DEVIATION : 400 diff --git a/src/openalea/sequence_analysis/data/test_align1.a b/src/openalea/sequence_analysis/data/test_align1.a new file mode 100644 index 0000000..347978e --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_align1.a @@ -0,0 +1,11 @@ +1 VARIABLE + +VARIABLE 1 : SYMBOLIC + +5 SYMBOLS + +0 +1 0 +1 1 0 +1 1 1 0 +2 2 2 2 0 diff --git a/src/openalea/sequence_analysis/data/test_compound1.cd b/src/openalea/sequence_analysis/data/test_compound1.cd new file mode 100644 index 0000000..e05a45e --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_compound1.cd @@ -0,0 +1,7 @@ +COMPOUND_DISTRIBUTION + +SUM_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 2 PROBABILITY : 0.5 + +ELEMENTARY_DISTRIBUTION +BINOMIAL INF_BOUND : 2 SUP_BOUND : 5 PROBABILITY : 0.5 \ No newline at end of file diff --git a/src/openalea/sequence_analysis/data/test_convolution1.conv b/src/openalea/sequence_analysis/data/test_convolution1.conv new file mode 100644 index 0000000..8a90a82 --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_convolution1.conv @@ -0,0 +1,7 @@ +CONVOLUTION 2 DISTRIBUTIONS + +DISTRIBUTION 1 +BINOMIAL INF_BOUND : 0 SUP_BOUND : 5 PROBABILITY : 0.5 + +DISTRIBUTION 2 +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 10 PROBABILITY : 0.3 diff --git a/src/openalea/sequence_analysis/data/test_hidden_markov.hmc b/src/openalea/sequence_analysis/data/test_hidden_markov.hmc new file mode 100644 index 0000000..f83b1d3 --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_hidden_markov.hmc @@ -0,0 +1,109 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +8 STATES + +INITIAL_PROBABILITIES +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 + +TRANSITION_PROBABILITIES +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 + +2 OUTPUT_PROCESSES + +OUTPUT_PROCESS 1 : PARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 151 PARAMETER : 4.98621 PROBABILITY : 0.0253079 +# mean: 342.608 variance: 7407.65 standard deviation: 86.0677 + +# state 0 observation histogram - sample size: 16 +# mean: 343 variance: 8093.6 standard deviation: 89.9644 + +STATE 1 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 14 PARAMETER : 5.51595 PROBABILITY : 0.128946 +# mean: 51.2609 variance: 288.921 standard deviation: 16.9977 + +# state 1 observation histogram - sample size: 450 +# mean: 52.6467 variance: 283.013 standard deviation: 16.823 + +STATE 2 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 15.2244 PROBABILITY : 0.296427 +# mean: 36.0955 variance: 120.061 standard deviation: 10.9573 + +# state 2 observation histogram - sample size: 616 +# mean: 37.1331 variance: 103.859 standard deviation: 10.1911 + +STATE 3 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 7.57415 PROBABILITY : 0.313207 +# mean: 16.5791 variance: 52.0408 standard deviation: 7.21393 + +# state 3 observation histogram - sample size: 76 +# mean: 15.3289 variance: 33.8504 standard deviation: 5.81811 + +STATE 4 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 0.417911 PROBABILITY : 0.287116 +# mean: 2.02391 variance: 3.3744 standard deviation: 1.83696 + +# state 4 observation histogram - sample size: 135 +# mean: 1.56296 variance: 1.21802 standard deviation: 1.10364 + +STATE 5 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 17.6102 PROBABILITY : 0.392841 +# mean: 27.1879 variance: 68.2611 standard deviation: 8.26203 + +# state 5 observation histogram - sample size: 841 +# mean: 27.0226 variance: 56.234 standard deviation: 7.49893 + +STATE 6 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 9.43365 PROBABILITY : 0.409779 +# mean: 13.5656 variance: 32.5876 standard deviation: 5.70855 + +# state 6 observation histogram - sample size: 674 +# mean: 12.997 variance: 25.8425 standard deviation: 5.08355 + +STATE 7 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1.81053 PROBABILITY : 0.331155 +# mean: 4.63929 variance: 10.6861 standard deviation: 3.26895 + +# state 7 observation histogram - sample size: 164 +# mean: 4.4939 variance: 9.20855 standard deviation: 3.03456 + +OUTPUT_PROCESS 2 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.944093 +OUTPUT 1 : 0.055907 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.998208 +OUTPUT 1 : 0.001792 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.97642 +OUTPUT 1 : 0.02358 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.99999 +OUTPUT 1 : 1e-05 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.99999 +OUTPUT 1 : 1e-05 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.99999 +OUTPUT 1 : 1e-05 + +STATE 7 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.99999 +OUTPUT 1 : 1e-05 diff --git a/src/openalea/sequence_analysis/data/test_hidden_markov_non-parametric1.hmc b/src/openalea/sequence_analysis/data/test_hidden_markov_non-parametric1.hmc new file mode 100644 index 0000000..5c02a32 --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_hidden_markov_non-parametric1.hmc @@ -0,0 +1,111 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +8 STATES + +INITIAL_PROBABILITIES +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 + +TRANSITION_PROBABILITIES +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 +0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125 + +2 OUTPUT_PROCESSES + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.944093 +OUTPUT 1 : 0.055907 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.998208 +OUTPUT 1 : 0.001792 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.97642 +OUTPUT 1 : 0.02358 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.99999 +OUTPUT 1 : 1e-05 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.99999 +OUTPUT 1 : 1e-05 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.99999 +OUTPUT 1 : 1e-05 + +STATE 7 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.99999 +OUTPUT 1 : 1e-05 + +OUTPUT_PROCESS 2 : PARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 151 PARAMETER : 4.98621 PROBABILITY : 0.0253079 +# mean: 342.608 variance: 7407.65 standard deviation: 86.0677 + +# state 0 observation histogram - sample size: 16 +# mean: 343 variance: 8093.6 standard deviation: 89.9644 + +STATE 1 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 14 PARAMETER : 5.51595 PROBABILITY : 0.128946 +# mean: 51.2609 variance: 288.921 standard deviation: 16.9977 + +# state 1 observation histogram - sample size: 450 +# mean: 52.6467 variance: 283.013 standard deviation: 16.823 + +STATE 2 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 15.2244 PROBABILITY : 0.296427 +# mean: 36.0955 variance: 120.061 standard deviation: 10.9573 + +# state 2 observation histogram - sample size: 616 +# mean: 37.1331 variance: 103.859 standard deviation: 10.1911 + +STATE 3 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 7.57415 PROBABILITY : 0.313207 +# mean: 16.5791 variance: 52.0408 standard deviation: 7.21393 + +# state 3 observation histogram - sample size: 76 +# mean: 15.3289 variance: 33.8504 standard deviation: 5.81811 + +STATE 4 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 0.417911 PROBABILITY : 0.287116 +# mean: 2.02391 variance: 3.3744 standard deviation: 1.83696 + +# state 4 observation histogram - sample size: 135 +# mean: 1.56296 variance: 1.21802 standard deviation: 1.10364 + +STATE 5 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 17.6102 PROBABILITY : 0.392841 +# mean: 27.1879 variance: 68.2611 standard deviation: 8.26203 + +# state 5 observation histogram - sample size: 841 +# mean: 27.0226 variance: 56.234 standard deviation: 7.49893 + +STATE 6 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 0 PARAMETER : 9.43365 PROBABILITY : 0.409779 +# mean: 13.5656 variance: 32.5876 standard deviation: 5.70855 + +# state 6 observation histogram - sample size: 674 +# mean: 12.997 variance: 25.8425 standard deviation: 5.08355 + +STATE 7 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1.81053 PROBABILITY : 0.331155 +# mean: 4.63929 variance: 10.6861 standard deviation: 3.26895 + +# state 7 observation histogram - sample size: 164 +# mean: 4.4939 variance: 9.20855 standard deviation: 3.03456 + + diff --git a/src/openalea/sequence_analysis/data/test_hidden_semi_markov.dat b/src/openalea/sequence_analysis/data/test_hidden_semi_markov.dat new file mode 100644 index 0000000..a83e13c --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_hidden_semi_markov.dat @@ -0,0 +1,89 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +8 STATES + +INITIAL_PROBABILITIES +0.4 0.3 0.3 0.0 0.0 0.0 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.4 0.3 0.3 0.0 0.0 0.0 0.0 +0.0 0.0 0.4 0.3 0.3 0.0 0.0 0.0 +0.0 0.0 0.0 0.4 0.3 0.3 0.0 0.0 +0.0 0.0 0.0 0.0 0.4 0.3 0.3 0.0 +0.0 0.0 0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 5 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 6 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.1 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.3 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.1 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.3 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.1 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.3 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.1 + +STATE 7 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 diff --git a/src/openalea/sequence_analysis/data/test_hidden_semi_markov_param.dat b/src/openalea/sequence_analysis/data/test_hidden_semi_markov_param.dat new file mode 100644 index 0000000..127f49f --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_hidden_semi_markov_param.dat @@ -0,0 +1,33 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +3 STATES + +INITIAL_PROBABILITIES +1 0 0 + +TRANSITION_PROBABILITIES +0.0 0.8 0.2 +0.2 0.0 0.8 +0.8 0.2 0.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +BINOMIAL INF_BOUND : 10 SUP_BOUND : 20 PROBABILITY : 0.85 + +STATE 1 OCCUPANCY_DISTRIBUTION +POISSON INF_BOUND : 15 PARAMETER : 25 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 20 PARAMETER : 10 PROBABILITY : 0.5 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : PARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +BINOMIAL INF_BOUND : 45 SUP_BOUND : 75 PROBABILITY : 0.25 + +STATE 1 OBSERVATION_DISTRIBUTION +BINOMIAL INF_BOUND : 75 SUP_BOUND : 100 PROBABILITY : 0.85 + +STATE 2 OBSERVATION_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 3 PROBABILITY : 0.2 diff --git a/src/openalea/sequence_analysis/data/test_nonhomogeneous.dat b/src/openalea/sequence_analysis/data/test_nonhomogeneous.dat new file mode 100644 index 0000000..2a3365b --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_nonhomogeneous.dat @@ -0,0 +1,18 @@ +NONHOMOGENEOUS_MARKOV_CHAIN + +2 STATES + +INITIAL_PROBABILITIES +0.97973 0.0202703 + +TRANSITION_PROBABILITIES +0.916226 0.0837743 +0.82235 0.17765 + + +STATE 0 NONHOMOGENEOUS +MONOMOLECULAR FUNCTION PARAMETER 1 : 0.99999 PARAMETER 2 : -0.200483 PARAMETER 3 : 0.0715162 + +STATE 1 HOMOGENEOUS + + diff --git a/src/openalea/sequence_analysis/data/test_param1.p b/src/openalea/sequence_analysis/data/test_param1.p new file mode 100644 index 0000000..a852b74 --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_param1.p @@ -0,0 +1,5 @@ +TOP_PARAMETERS + +PROBABILITY : 0.6 +AXILLARY_PROBABILITY : 0.6 +RHYTHM_RATIO : 1.2 diff --git a/src/openalea/sequence_analysis/data/test_semi_markov.dat b/src/openalea/sequence_analysis/data/test_semi_markov.dat new file mode 100644 index 0000000..3310b24 --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_semi_markov.dat @@ -0,0 +1,21 @@ +SEMI-MARKOV_CHAIN + +4 STATES + +INITIAL_PROBABILITIES +0.8 0.2 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.6 0.4 0.0 +0.0 0.0 0.7 0.3 +0.0 0.2 0.0 0.8 +0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 2 PARAMETER : 3.2 PROBABILITY : 0.4 + +STATE 1 OCCUPANCY_DISTRIBUTION +BINOMIAL INF_BOUND : 1 SUP_BOUND : 12 PROBABILITY : 0.6 + +STATE 2 OCCUPANCY_DISTRIBUTION +POISSON INF_BOUND : 1 PARAMETER : 5.4 diff --git a/src/openalea/sequence_analysis/data/test_time_events.dat b/src/openalea/sequence_analysis/data/test_time_events.dat new file mode 100644 index 0000000..417c39c --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_time_events.dat @@ -0,0 +1,8 @@ +20 2 1 +20 3 2 +20 4 4 +20 5 12 +20 6 14 +20 7 6 +20 8 2 +20 9 1 diff --git a/src/openalea/sequence_analysis/data/test_top_parameters.dat b/src/openalea/sequence_analysis/data/test_top_parameters.dat new file mode 100644 index 0000000..e942dbb --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_top_parameters.dat @@ -0,0 +1,5 @@ +TOP_PARAMETERS + +PROBABILITY : 0.7 +AXILLARY_PROBABILITY : 0.6 +RHYTHM_RATIO : 0.8 diff --git a/src/openalea/sequence_analysis/data/test_tops1.dat b/src/openalea/sequence_analysis/data/test_tops1.dat new file mode 100644 index 0000000..7664b23 --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_tops1.dat @@ -0,0 +1,9 @@ +2 VARIABLES + +VARIABLE 1 : POSITION +VARIABLE 2 : NB_INTERNODE + +10 5 | 12 5 | 13 6 | 13 8 | 15 7 | 20 10 | 22 11 | 23 11 | 27 15 | 30 16 | 31 15 | 32 17 \ +35 16 | 37 18 | 40 19 | 45 + +5 2 | 7 4 | 10 5 | 11 6 | 15 7 | 18 8 | 20 9 | 21 11 | 22 11 | 25 12 | 25 diff --git a/src/openalea/sequence_analysis/data/test_variable_order_markov.dat b/src/openalea/sequence_analysis/data/test_variable_order_markov.dat new file mode 100644 index 0000000..9643314 --- /dev/null +++ b/src/openalea/sequence_analysis/data/test_variable_order_markov.dat @@ -0,0 +1,36 @@ +MARKOV_CHAIN + +5 STATES + +INITIAL_PROBABILITIES +0.6 0.0666666 0.266667 0 0.0666667 + +TRANSITION_PROBABILITIES # memory +# 0.777778 0.111111 0.111111 0 0 0 non-terminal +0.871269 0.031716 0.057835 0.007462 0.031718 0 0 # terminal +0.405405 0.297297 0.216216 0.081082 0 1 0 # terminal +0.622642 0.169811 0.188679 0 0.018868 2 0 # terminal +0.217391 0.217391 0.043478 0.391304 0.130436 3 0 # terminal +0.55814 0 0.046511 0.023255 0.372094 4 0 # terminal +0.24183 0.457516 0.084967 0.169935 0.045752 1 # terminal +0.5 0.150943 0.301887 0.037735 0.009435 2 # terminal +0.261364 0.170455 0.022727 0.386364 0.15909 3 # terminal +0.153571 0.028571 0.007142 0.025 0.785716 4 # terminal + +# recurrent class: states 0 1 2 3 4 + +# memory tree + +# |___0___0 0 +# | |___1 0 +# | |___2 0 +# | |___3 0 +# | |___4 0 +# |___1 +# |___2 +# |___3 +# |___4 + +# 0 1 2 3 4 +# 0 0 0 1 0 2 0 3 0 4 0 + diff --git a/src/openalea/sequence_analysis/data/vanille_m.seq b/src/openalea/sequence_analysis/data/vanille_m.seq new file mode 100644 index 0000000..a50958c --- /dev/null +++ b/src/openalea/sequence_analysis/data/vanille_m.seq @@ -0,0 +1,362 @@ +1 VARIABLE + +VARIABLE 1 : INT # 2 values + +# value histogram - size of the sample : 3899 +# mean : 0.0897666 variance : 0.0817295 standard deviation : 0.285884 + +# | value histogram +# 0 3549 +# 1 350 + +# value 0 recurrence time histogram - size of the sample : 3401 +# mean : 1.10144 variance : 0.132942 standard deviation : 0.364612 + +# value 1 recurrence time histogram - size of the sample : 202 +# mean : 3.32178 variance : 10.1298 standard deviation : 3.18273 + +# value 0 sojourn time histogram - size of the sample : 285 +# mean : 4.48772 variance : 16.9902 standard deviation : 4.12191 + +# final run - value 0 sojourn time histogram - size of the sample : 147 +# mean : 15.4422 variance : 145.057 standard deviation : 12.0439 + +# value 1 sojourn time histogram - size of the sample : 287 +# mean : 1.21254 variance : 0.237884 standard deviation : 0.487734 + +# final run - value 1 sojourn time histogram - size of the sample : 1 +# mean : 2 variance : 0 standard deviation : 0 + +# number of runs of value 0 per sequence histogram - size of the sample : 148 +# mean : 2.91892 variance : 1.70767 standard deviation : 1.30678 + +# number of runs of value 1 per sequence histogram - size of the sample : 148 +# mean : 1.94595 variance : 1.72495 standard deviation : 1.31337 + +# number of occurences of value 0 per sequence histogram - size of the sample : 148 +# mean : 23.9797 variance : 223.612 standard deviation : 14.9537 + +# number of occurences of value 1 per sequence histogram - size of the sample : 148 +# mean : 2.36486 variance : 3.10406 standard deviation : 1.76184 + +# sequence length histogram - size of the sample : 148 +# mean : 26.3446 variance : 245.125 standard deviation : 15.6565 + +# length cumul : 3899 + +0 0 0 0 1 0 1 0 1 0 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 \ +0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 0 0 0 0 0 0 0 + +0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 + +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 + +0 0 0 1 1 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 1 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 0 0 0 + +0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 + +0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 + +0 1 1 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 1 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 + +0 1 0 0 0 0 1 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 1 0 0 0 0 0 + +0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 + +0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 1 0 0 0 0 0 + +0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 1 0 0 0 0 0 0 0 0 0 + +0 1 0 1 0 1 0 0 0 0 0 0 0 + +0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 + +0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 1 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 + +0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 0 0 + +0 1 0 0 0 0 0 0 0 + +0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 + +0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 1 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 1 0 0 1 0 0 0 0 + +0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 1 0 0 0 0 0 0 0 + +0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 + +0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 + +0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 + +0 0 0 1 0 0 0 0 0 0 + +0 1 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 + +0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 + +0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 + +0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 + +0 1 0 1 0 0 0 1 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 0 0 0 + +0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 + +0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 1 0 0 + +0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 + +0 0 0 0 1 1 0 0 0 0 0 + +0 0 0 1 0 1 0 1 0 0 0 0 0 + +0 0 1 0 0 0 1 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 + +0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 1 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 + +0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 1 1 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 + +0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 + +0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 1 1 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 1 1 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 \ +0 0 0 0 0 0 + +1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 + +0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 + +0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 + +0 0 0 0 1 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 + +0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/src/openalea/sequence_analysis/data/well_log_filtered.seq b/src/openalea/sequence_analysis/data/well_log_filtered.seq new file mode 100644 index 0000000..1c6d813 --- /dev/null +++ b/src/openalea/sequence_analysis/data/well_log_filtered.seq @@ -0,0 +1,4060 @@ +# Well-log data; used in Fearnhead and Clifford "On-line Inference for +# Hidden Markov Models via Particle Filters". Measurements of Nuclear-response +# of a well-bore over time. Data from O Ruanaidh, J. J. K. and +# Fitzgerald, W. J. (1996). "Numerical Bayesion Methods Applied to Signal +# Processing". New York: Springer. + +1 VARIABLE + +VARIABLE 1 : REAL + +# 1.3353060e+05 \ +# 1.3711910e+05 \ +# 1.3382050e+05 \ +# 1.3601430e+05 \ +# 1.3375700e+05 \ +# 1.2756550e+05 \ +# 1.2141570e+05 \ +1.1457840e+05 \ +1.0495290e+05 \ +# 1.0271870e+05 \ +# 9.6389020e+04 \ +# 9.1232100e+04 \ +# 9.9749550e+04 \ +# 9.4807340e+04 \ +# 9.3409290e+04 \ +# 9.7200520e+04 \ +# 9.7894880e+04 \ +# 1.0306940e+05 \ +# 1.0219520e+05 \ +1.0978810e+05 \ +1.0769660e+05 \ +1.0977970e+05 \ +1.1177950e+05 \ +1.1052790e+05 \ +1.1023730e+05 \ +1.0885180e+05 \ +1.1256800e+05 \ +1.1499570e+05 \ +# 1.2071490e+05 \ +1.1305060e+05 \ +1.1286510e+05 \ +1.1178960e+05 \ +1.0816080e+05 \ +1.0845440e+05 \ +1.1224750e+05 \ +1.0889830e+05 \ +1.0910570e+05 \ +1.0685390e+05 \ +1.0814610e+05 \ +1.0946570e+05 \ +1.1186480e+05 \ +1.1305060e+05 \ +1.1392470e+05 \ +1.1018690e+05 \ +1.0953450e+05 \ +1.0843670e+05 \ +1.1119330e+05 \ +1.0985310e+05 \ +1.0886350e+05 \ +1.1123210e+05 \ +1.1187640e+05 \ +1.1295450e+05 \ +1.0981610e+05 \ +1.1107040e+05 \ +1.1148400e+05 \ +1.1331190e+05 \ +1.1480290e+05 \ +1.1275970e+05 \ +1.0923310e+05 \ +1.1015830e+05 \ +1.0909400e+05 \ +1.0934200e+05 \ +1.1278370e+05 \ +1.1303060e+05 \ +1.0917270e+05 \ +# 9.8990280e+04 \ +1.0990190e+05 \ +1.0859220e+05 \ +1.1257760e+05 \ +1.1357400e+05 \ +1.1392210e+05 \ +1.0961630e+05 \ +1.1388310e+05 \ +1.1253850e+05 \ +1.1090080e+05 \ +1.1146170e+05 \ +1.1202550e+05 \ +1.0834310e+05 \ +1.0888840e+05 \ +1.1433830e+05 \ +1.1550990e+05 \ +1.1111100e+05 \ +1.1075010e+05 \ +1.1387040e+05 \ +1.1503880e+05 \ +1.1141660e+05 \ +1.1592440e+05 \ +1.1153290e+05 \ +1.1309230e+05 \ +1.1549920e+05 \ +1.1059940e+05 \ +1.0884960e+05 \ +1.1210620e+05 \ +1.1318300e+05 \ +1.1439480e+05 \ +1.1268920e+05 \ +1.1108560e+05 \ +1.1295220e+05 \ +1.1371610e+05 \ +1.1208660e+05 \ +1.1448910e+05 \ +1.1338870e+05 \ +1.1114440e+05 \ +1.1261850e+05 \ +1.1325050e+05 \ +1.0787920e+05 \ +1.0873260e+05 \ +1.1172400e+05 \ +1.1145260e+05 \ +1.1626730e+05 \ +1.1013850e+05 \ +1.1111010e+05 \ +1.0778010e+05 \ +1.1322600e+05 \ +1.1140350e+05 \ +1.1306240e+05 \ +1.1218770e+05 \ +1.1437020e+05 \ +1.1230760e+05 \ +1.0848920e+05 \ +1.1604440e+05 \ +1.1512160e+05 \ +1.1615560e+05 \ +1.0959830e+05 \ +1.1415880e+05 \ +# 1.0724130e+05 \ +1.1287050e+05 \ +1.1596510e+05 \ +1.1439170e+05 \ +1.1016710e+05 \ +1.1083070e+05 \ +1.1570850e+05 \ +1.1293990e+05 \ +1.1384630e+05 \ +1.1556770e+05 \ +1.1376510e+05 \ +1.1480290e+05 \ +1.0908840e+05 \ +1.1122260e+05 \ +1.1453440e+05 \ +1.1271860e+05 \ +1.1240810e+05 \ +1.1647750e+05 \ +1.1507660e+05 \ +1.1073030e+05 \ +1.1057800e+05 \ +1.1368350e+05 \ +1.1148400e+05 \ +1.1342320e+05 \ +1.0891380e+05 \ +1.1556770e+05 \ +1.1236680e+05 \ +1.1453920e+05 \ +1.1370360e+05 \ +1.0882820e+05 \ +1.1703450e+05 \ +1.1257850e+05 \ +1.1208630e+05 \ +# 1.1913280e+05 \ +1.1470190e+05 \ +# 1.0749850e+05 \ +1.1583660e+05 \ +1.1228760e+05 \ +1.1606710e+05 \ +1.1099060e+05 \ +1.0938380e+05 \ +1.1329240e+05 \ +1.0838540e+05 \ +1.1428640e+05 \ +1.1354170e+05 \ +1.1285970e+05 \ +1.1011880e+05 \ +1.1372370e+05 \ +# 1.0563570e+05 \ +1.0983590e+05 \ +1.0933490e+05 \ +1.0910210e+05 \ +1.1523320e+05 \ +1.1071040e+05 \ +1.1271090e+05 \ +1.1387440e+05 \ +1.1387440e+05 \ +1.1036630e+05 \ +1.1355400e+05 \ +1.1355400e+05 \ +1.1500160e+05 \ +1.1109180e+05 \ +# 1.0753590e+05 \ +# 1.0663850e+05 \ +1.1431310e+05 \ +1.1354280e+05 \ +1.1201540e+05 \ +1.1272920e+05 \ +1.1372370e+05 \ +1.1151320e+05 \ +1.1182950e+05 \ +1.1282920e+05 \ +1.1468170e+05 \ +1.1309060e+05 \ +1.1213730e+05 \ +1.1297990e+05 \ +1.1004960e+05 \ +1.1325230e+05 \ +1.1683440e+05 \ +1.1665180e+05 \ +1.0987390e+05 \ +1.1336300e+05 \ +1.0998790e+05 \ +1.1231650e+05 \ +1.0855240e+05 \ +1.1228680e+05 \ +1.1072620e+05 \ +# 1.1840130e+05 \ +1.1415290e+05 \ +1.0963770e+05 \ +1.1218590e+05 \ +1.0925620e+05 \ +1.1405250e+05 \ +1.1070000e+05 \ +1.1308910e+05 \ +1.1001880e+05 \ +1.1367230e+05 \ +1.1129240e+05 \ +1.1337760e+05 \ +1.1277980e+05 \ +1.1363030e+05 \ +1.1375390e+05 \ +1.1391590e+05 \ +1.1311060e+05 \ +1.1537620e+05 \ +1.1004740e+05 \ +1.1235720e+05 \ +1.1384690e+05 \ +1.1005130e+05 \ +1.1175000e+05 \ +1.1537430e+05 \ +1.1206510e+05 \ +1.1035050e+05 \ +1.0888840e+05 \ +1.1181500e+05 \ +1.1485280e+05 \ +1.1296140e+05 \ +1.1551710e+05 \ +1.1372370e+05 \ +1.1421250e+05 \ +1.0995570e+05 \ +1.1552720e+05 \ +1.1145210e+05 \ +1.1151410e+05 \ +1.1380670e+05 \ +1.1380670e+05 \ +1.1063500e+05 \ +1.1400510e+05 \ +1.1301050e+05 \ +1.1055980e+05 \ +1.1015610e+05 \ +1.1314160e+05 \ +1.1529530e+05 \ +1.1031980e+05 \ +1.1337210e+05 \ +1.0894350e+05 \ +1.1131120e+05 \ +1.1594680e+05 \ +1.1472210e+05 \ +1.1590630e+05 \ +1.1631220e+05 \ +1.1270420e+05 \ +1.1520510e+05 \ +1.1110430e+05 \ +1.1260930e+05 \ +1.1413630e+05 \ +1.1667210e+05 \ +1.1314160e+05 \ +1.1551470e+05 \ +1.1174830e+05 \ +1.1339770e+05 \ +1.0821700e+05 \ +1.1595960e+05 \ +1.1405520e+05 \ +1.1232690e+05 \ +1.1209640e+05 \ +1.0922500e+05 \ +1.1362330e+05 \ +1.1171870e+05 \ +1.1098010e+05 \ +1.1220580e+05 \ +1.1421930e+05 \ +1.1541680e+05 \ +1.0937550e+05 \ +1.1080110e+05 \ +1.1117310e+05 \ +1.1177510e+05 \ +1.1094240e+05 \ +1.1154360e+05 \ +1.0895520e+05 \ +1.1236680e+05 \ +1.1124240e+05 \ +1.1198580e+05 \ +1.1217660e+05 \ +1.1056880e+05 \ +1.1154360e+05 \ +1.1503610e+05 \ +1.1288120e+05 \ +1.1357640e+05 \ +1.1267330e+05 \ +1.1432130e+05 \ +1.0698500e+05 \ +1.1339210e+05 \ +1.1258800e+05 \ +1.1170560e+05 \ +1.1513090e+05 \ +1.0781210e+05 \ +1.1203530e+05 \ +1.1339770e+05 \ +1.1258290e+05 \ +1.1578330e+05 \ +1.1430970e+05 \ +1.1542930e+05 \ +1.1176380e+05 \ +1.1125200e+05 \ +1.1195580e+05 \ +1.1354500e+05 \ +1.1013850e+05 \ +1.1142310e+05 \ +1.1123160e+05 \ +1.1050430e+05 \ +1.0803630e+05 \ +1.0923310e+05 \ +1.0991490e+05 \ +1.1372220e+05 \ +1.1300130e+05 \ +1.1109180e+05 \ +1.1223740e+05 \ +1.0971340e+05 \ +1.0662540e+05 \ +1.0784010e+05 \ +1.1108470e+05 \ +1.1151360e+05 \ +1.1186480e+05 \ +1.1185480e+05 \ +1.0955420e+05 \ +1.0839430e+05 \ +1.1174380e+05 \ +1.1174980e+05 \ +1.1146830e+05 \ +1.0969810e+05 \ +1.1082380e+05 \ +1.0976710e+05 \ +1.1408680e+05 \ +1.1213670e+05 \ +1.1595770e+05 \ +1.1337700e+05 \ +1.1165400e+05 \ +1.0792940e+05 \ +1.0942070e+05 \ +# 9.4972870e+04 \ +# 9.8664480e+04 \ +# 9.9972630e+04 \ +# 1.0507330e+05 \ +# 1.0507330e+05 \ +1.1064970e+05 \ +1.1029610e+05 \ +1.0838230e+05 \ +1.0838230e+05 \ +1.0911370e+05 \ +1.1307060e+05 \ +1.0767370e+05 \ +1.1204540e+05 \ +1.1117190e+05 \ +1.0703030e+05 \ +1.1391590e+05 \ +1.1244990e+05 \ +1.1097080e+05 \ +# 1.0520090e+05 \ +1.1236660e+05 \ +1.0910940e+05 \ +1.0955420e+05 \ +1.1087250e+05 \ +1.0956550e+05 \ +1.1254810e+05 \ +1.0947400e+05 \ +1.1196570e+05 \ +1.1392470e+05 \ +1.0797620e+05 \ +1.1175420e+05 \ +1.0787410e+05 \ +1.1119230e+05 \ +1.1209680e+05 \ +# 1.1723820e+05 \ +1.0796250e+05 \ +1.0796250e+05 \ +1.1030930e+05 \ +1.1242780e+05 \ +1.1075160e+05 \ +1.1311170e+05 \ +1.1355750e+05 \ +1.1199000e+05 \ +1.1026060e+05 \ +1.0652110e+05 \ +1.1337210e+05 \ +1.1258890e+05 \ +1.1329240e+05 \ +1.1260800e+05 \ +1.1309980e+05 \ +1.1196570e+05 \ +1.1115130e+05 \ +1.1178520e+05 \ +1.0740110e+05 \ +1.1111100e+05 \ +1.0935590e+05 \ +1.0935590e+05 \ +1.1098950e+05 \ +1.0863450e+05 \ +1.1248810e+05 \ +1.1236760e+05 \ +1.0806500e+05 \ +1.1101670e+05 \ +1.0714230e+05 \ +1.1049910e+05 \ +1.1477700e+05 \ +1.0869340e+05 \ +1.0842150e+05 \ +1.0722100e+05 \ +1.0836270e+05 \ +1.0836270e+05 \ +1.0974440e+05 \ +1.1107130e+05 \ +1.1215630e+05 \ +1.1479090e+05 \ +1.1317210e+05 \ +1.1230700e+05 \ +1.1069850e+05 \ +1.0956550e+05 \ +1.1068510e+05 \ +1.1446900e+05 \ +1.1131160e+05 \ +1.1163430e+05 \ +1.0918920e+05 \ +1.1197570e+05 \ +1.0816360e+05 \ +1.0902940e+05 \ +1.0961200e+05 \ +1.0980490e+05 \ +1.1086110e+05 \ +# 1.0612350e+05 \ +1.1360320e+05 \ +1.1360320e+05 \ +1.1085050e+05 \ +1.1485080e+05 \ +1.1266950e+05 \ +1.1370480e+05 \ +1.1095090e+05 \ +1.1076430e+05 \ +1.1239390e+05 \ +1.1327960e+05 \ +1.1197560e+05 \ +1.1165440e+05 \ +1.1194590e+05 \ +1.1314120e+05 \ +1.1196040e+05 \ +1.0907240e+05 \ +1.1489120e+05 \ +1.1425770e+05 \ +1.1275040e+05 \ +1.1144240e+05 \ +1.1378050e+05 \ +1.1321040e+05 \ +1.1204540e+05 \ +1.1007570e+05 \ +1.1003480e+05 \ +1.1148290e+05 \ +1.1165420e+05 \ +1.1359890e+05 \ +1.1276920e+05 \ +1.1331750e+05 \ +1.1417950e+05 \ +1.1271970e+05 \ +1.0901340e+05 \ +1.0941490e+05 \ +1.1511990e+05 \ +1.1244260e+05 \ +1.1250260e+05 \ +1.0852080e+05 \ +1.0980490e+05 \ +1.1402950e+05 \ +1.1168420e+05 \ +1.1480910e+05 \ +1.1278920e+05 \ +1.1662110e+05 \ +1.0832650e+05 \ +1.1438640e+05 \ +1.1514440e+05 \ +1.1285640e+05 \ +1.1558410e+05 \ +1.1106090e+05 \ +1.1383170e+05 \ +1.1222640e+05 \ +1.1354500e+05 \ +1.0940350e+05 \ +1.0940350e+05 \ +1.0882430e+05 \ +1.0882430e+05 \ +1.1585160e+05 \ +1.1585160e+05 \ +1.1248810e+05 \ +1.1304130e+05 \ +1.1191470e+05 \ +1.1323040e+05 \ +# 1.1786630e+05 \ +1.0830390e+05 \ +1.1107200e+05 \ +1.1525340e+05 \ +1.1413570e+05 \ +1.0962480e+05 \ +1.1102510e+05 \ +1.1240810e+05 \ +# 1.0520170e+05 \ +1.1665480e+05 \ +1.1357640e+05 \ +1.1015850e+05 \ +1.1279860e+05 \ +1.0965980e+05 \ +1.1348480e+05 \ +1.1165410e+05 \ +1.1334290e+05 \ +1.1277040e+05 \ +1.0955420e+05 \ +1.1189460e+05 \ +1.0852850e+05 \ +1.0994340e+05 \ +1.0813640e+05 \ +1.1211680e+05 \ +# 1.1862150e+05 \ +1.1627150e+05 \ +1.1455930e+05 \ +1.1123090e+05 \ +1.1461820e+05 \ +1.1294520e+05 \ +1.1350660e+05 \ +1.1073950e+05 \ +1.1321220e+05 \ +1.1578090e+05 \ +1.0911420e+05 \ +1.1220090e+05 \ +1.1135650e+05 \ +# 1.1836780e+05 \ +1.1114040e+05 \ +1.0975560e+05 \ +1.1372490e+05 \ +1.1174380e+05 \ +1.1610320e+05 \ +1.1200580e+05 \ +1.1349370e+05 \ +1.1369850e+05 \ +1.0843800e+05 \ +1.0870670e+05 \ +1.1233110e+05 \ +# 1.1900170e+05 \ +1.0977530e+05 \ +# 1.1856000e+05 \ +1.1122130e+05 \ +1.1237930e+05 \ +1.1463840e+05 \ +1.1115130e+05 \ +1.1255450e+05 \ +1.1225190e+05 \ +1.1109120e+05 \ +1.0885180e+05 \ +1.0862810e+05 \ +# 1.0723450e+05 \ +# 1.0723450e+05 \ +1.1411850e+05 \ +1.1161310e+05 \ +1.1188470e+05 \ +1.1179500e+05 \ +1.1179500e+05 \ +1.1200580e+05 \ +1.1443210e+05 \ +1.1307220e+05 \ +1.1450100e+05 \ +1.1147210e+05 \ +1.1653500e+05 \ +1.0931190e+05 \ +1.1735550e+05 \ +1.1530020e+05 \ +1.1490010e+05 \ +1.1512900e+05 \ +1.1338310e+05 \ +1.1268900e+05 \ +1.1507660e+05 \ +1.1321400e+05 \ +1.1348480e+05 \ +1.1438840e+05 \ +1.1412710e+05 \ +1.1529990e+05 \ +1.1211730e+05 \ +1.1612560e+05 \ +1.1197570e+05 \ +1.1520210e+05 \ +1.1218590e+05 \ +1.1273040e+05 \ +1.0889630e+05 \ +# 1.1958050e+05 \ +# 1.0803710e+05 \ +1.1188930e+05 \ +# 1.1826630e+05 \ +1.1321770e+05 \ +1.1511060e+05 \ +1.1358230e+05 \ +1.1343430e+05 \ +1.1364690e+05 \ +1.1175380e+05 \ +1.1372740e+05 \ +1.1230700e+05 \ +# 1.0755540e+05 \ +1.1484750e+05 \ +1.1111500e+05 \ +1.1783950e+05 \ +# 1.0753590e+05 \ +1.1524010e+05 \ +1.1562990e+05 \ +1.1634510e+05 \ +1.1235800e+05 \ +1.1329240e+05 \ +1.1329440e+05 \ +1.1082650e+05 \ +1.1025290e+05 \ +1.1628250e+05 \ +1.1090080e+05 \ +1.1751320e+05 \ +1.1251960e+05 \ +1.1251960e+05 \ +1.1561470e+05 \ +1.1561470e+05 \ +1.1211680e+05 \ +1.1211680e+05 \ +1.0950640e+05 \ +1.1266890e+05 \ +1.1797630e+05 \ +1.1345440e+05 \ +1.1277630e+05 \ +1.1329190e+05 \ +1.1250800e+05 \ +1.1526580e+05 \ +1.1245850e+05 \ +1.1484760e+05 \ +1.1330340e+05 \ +1.1188470e+05 \ +1.1362390e+05 \ +1.1042710e+05 \ +1.1394750e+05 \ +1.1308310e+05 \ +1.1199560e+05 \ +1.1559700e+05 \ +1.1271030e+05 \ +1.1516460e+05 \ +1.1171850e+05 \ +1.1069850e+05 \ +1.1387850e+05 \ +1.1283980e+05 \ +1.1456950e+05 \ +1.0961340e+05 \ +1.1332290e+05 \ +1.1785870e+05 \ +1.1238860e+05 \ +1.1345440e+05 \ +1.1087540e+05 \ +1.1206020e+05 \ +1.1374760e+05 \ +1.1421930e+05 \ +1.1169420e+05 \ +1.1520510e+05 \ +1.1520510e+05 \ +1.1427270e+05 \ +1.1593930e+05 \ +1.1736210e+05 \ +1.1380670e+05 \ +1.1727380e+05 \ +1.1723150e+05 \ +1.1313240e+05 \ +1.1431310e+05 \ +1.1603700e+05 \ +1.1684770e+05 \ +1.1592440e+05 \ +1.1648350e+05 \ +1.1531870e+05 \ +1.1440360e+05 \ +1.1557150e+05 \ +1.1445230e+05 \ +1.1155280e+05 \ +1.1375630e+05 \ +# 1.2023730e+05 \ +1.1608850e+05 \ +1.1466980e+05 \ +1.1543700e+05 \ +1.1668990e+05 \ +1.1087970e+05 \ +1.1110060e+05 \ +1.1162290e+05 \ +1.1269970e+05 \ +1.1142640e+05 \ +1.1622660e+05 \ +1.1292590e+05 \ +1.1069850e+05 \ +1.1570850e+05 \ +1.1386040e+05 \ +1.1326330e+05 \ +1.1151370e+05 \ +1.1313520e+05 \ +1.1448100e+05 \ +1.1448100e+05 \ +1.1542780e+05 \ +1.1343660e+05 \ +1.1271030e+05 \ +1.1313340e+05 \ +# 1.0178280e+05 \ +# 9.7932140e+04 \ +# 9.8081630e+04 \ +# 1.0554250e+05 \ +1.1131160e+05 \ +1.1236600e+05 \ +1.1158940e+05 \ +1.0946570e+05 \ +1.1566020e+05 \ +1.1302510e+05 \ +1.1344120e+05 \ +1.1043970e+05 \ +1.1495980e+05 \ +1.0862450e+05 \ +1.0985560e+05 \ +1.1246780e+05 \ +1.1417050e+05 \ +1.0841390e+05 \ +1.1200990e+05 \ +1.1190450e+05 \ +1.0970490e+05 \ +1.0981610e+05 \ +1.1065740e+05 \ +1.1585680e+05 \ +1.1019780e+05 \ +1.1421250e+05 \ +1.1015140e+05 \ +1.1349730e+05 \ +1.1242260e+05 \ +1.0971610e+05 \ +1.0781190e+05 \ +1.1227730e+05 \ +1.1120270e+05 \ +1.0867920e+05 \ +1.1143220e+05 \ +1.1073180e+05 \ +1.1053700e+05 \ +1.1033570e+05 \ +1.0864610e+05 \ +1.0826350e+05 \ +1.1104100e+05 \ +1.1087170e+05 \ +1.0794300e+05 \ +1.0801020e+05 \ +1.1052790e+05 \ +1.1560470e+05 \ +1.1169420e+05 \ +1.1319210e+05 \ +1.1293060e+05 \ +1.1150780e+05 \ +1.1269970e+05 \ +1.1285160e+05 \ +1.0708980e+05 \ +1.1087170e+05 \ +1.1317350e+05 \ +1.1442030e+05 \ +1.1535930e+05 \ +1.1535930e+05 \ +1.1323780e+05 \ +1.0824670e+05 \ +1.0757670e+05 \ +1.0824390e+05 \ +1.1068020e+05 \ +1.0803710e+05 \ +1.0648800e+05 \ +1.1211640e+05 \ +1.0903380e+05 \ +1.0902210e+05 \ +1.1292380e+05 \ +1.0835820e+05 \ +1.1295220e+05 \ +1.0933880e+05 \ +1.1402520e+05 \ +1.0970920e+05 \ +1.1408610e+05 \ +1.1312150e+05 \ +1.1116030e+05 \ +1.1539650e+05 \ +1.1380670e+05 \ +1.1592390e+05 \ +1.0853280e+05 \ +1.1143190e+05 \ +1.1258940e+05 \ +1.1357400e+05 \ +1.1305220e+05 \ +1.1505410e+05 \ +1.0973590e+05 \ +1.1037160e+05 \ +1.1054350e+05 \ +1.1337400e+05 \ +1.1491620e+05 \ +1.1007570e+05 \ +1.1223680e+05 \ +1.1152340e+05 \ +1.1452120e+05 \ +1.1168420e+05 \ +1.1567790e+05 \ +1.1032860e+05 \ +1.1117600e+05 \ +1.0976410e+05 \ +1.1674930e+05 \ +1.1356510e+05 \ +1.1327430e+05 \ +1.0951180e+05 \ +1.1250800e+05 \ +1.1113000e+05 \ +1.1291050e+05 \ +1.1545860e+05 \ +1.1448910e+05 \ +1.1153290e+05 \ +1.1243850e+05 \ +1.1448440e+05 \ +1.1342320e+05 \ +1.1470190e+05 \ +1.1373620e+05 \ +1.1146230e+05 \ +1.1157470e+05 \ +1.1312700e+05 \ +1.1533580e+05 \ +1.1168400e+05 \ +1.1114650e+05 \ +1.1282500e+05 \ +1.1363570e+05 \ +1.1477460e+05 \ +1.1113000e+05 \ +1.1406830e+05 \ +1.1305060e+05 \ +1.1081880e+05 \ +1.1361550e+05 \ +1.1501450e+05 \ +1.1501450e+05 \ +1.1494480e+05 \ +1.1250800e+05 \ +1.1181500e+05 \ +1.1413860e+05 \ +1.1446450e+05 \ +1.1462730e+05 \ +1.1205740e+05 \ +1.1104100e+05 \ +1.1388580e+05 \ +1.1577340e+05 \ +1.1541680e+05 \ +1.1329240e+05 \ +1.1617150e+05 \ +1.1266950e+05 \ +1.1306300e+05 \ +1.1293990e+05 \ +1.1054770e+05 \ +1.1265070e+05 \ +1.1655920e+05 \ +1.1516460e+05 \ +1.1382950e+05 \ +1.1182500e+05 \ +1.1189920e+05 \ +1.1330340e+05 \ +1.1312250e+05 \ +1.1030690e+05 \ +1.1277100e+05 \ +1.1162450e+05 \ +1.1265350e+05 \ +1.1392060e+05 \ +1.1330100e+05 \ +1.1095920e+05 \ +1.1635290e+05 \ +1.1319360e+05 \ +1.1262940e+05 \ +1.1621930e+05 \ +1.1049740e+05 \ +1.1515110e+05 \ +1.1417210e+05 \ +1.1660000e+05 \ +1.1315200e+05 \ +1.1260000e+05 \ +1.1540900e+05 \ +1.1517690e+05 \ +1.1113120e+05 \ +1.1205590e+05 \ +# 1.1848510e+05 \ +1.1726550e+05 \ +1.1828090e+05 \ +1.1439520e+05 \ +1.1256940e+05 \ +1.1167840e+05 \ +1.1304670e+05 \ +1.1304670e+05 \ +1.1024320e+05 \ +1.1619180e+05 \ +1.1285050e+05 \ +1.1014730e+05 \ +1.1467600e+05 \ +# 1.1935550e+05 \ +1.1467600e+05 \ +1.1482720e+05 \ +1.1232760e+05 \ +1.1348710e+05 \ +1.1643360e+05 \ +1.1286720e+05 \ +1.1467600e+05 \ +1.1397690e+05 \ +1.1252990e+05 \ +1.1611050e+05 \ +# 1.0870100e+05 \ +1.1459510e+05 \ +1.1104100e+05 \ +1.1101890e+05 \ +1.1294130e+05 \ +1.1849980e+05 \ +1.1624700e+05 \ +1.1525250e+05 \ +1.1478270e+05 \ +1.0994340e+05 \ +1.1017590e+05 \ +1.1643270e+05 \ +1.1077230e+05 \ +1.1474090e+05 \ +1.1438000e+05 \ +1.1785800e+05 \ +1.1438240e+05 \ +1.1230760e+05 \ +1.1230760e+05 \ +1.1340310e+05 \ +1.1387770e+05 \ +1.1632840e+05 \ +1.1449270e+05 \ +1.1701930e+05 \ +1.1656250e+05 \ +1.1098400e+05 \ +1.1129060e+05 \ +1.1746850e+05 \ +1.0997320e+05 \ +1.1356760e+05 \ +1.1765930e+05 \ +1.1504840e+05 \ +1.1837050e+05 \ +1.1178380e+05 \ +1.1299580e+05 \ +1.1508240e+05 \ +1.1338510e+05 \ +1.1376640e+05 \ +1.1052790e+05 \ +1.1670330e+05 \ +1.1311230e+05 \ +1.1365580e+05 \ +1.1444050e+05 \ +1.1232760e+05 \ +1.1251960e+05 \ +1.1807410e+05 \ +1.1838240e+05 \ +1.1155340e+05 \ +1.1552720e+05 \ +1.1449270e+05 \ +1.1277980e+05 \ +1.1673760e+05 \ +1.1224830e+05 \ +1.1581150e+05 \ +1.1517690e+05 \ +1.1321710e+05 \ +1.1604260e+05 \ +1.1086790e+05 \ +1.1641940e+05 \ +1.1378650e+05 \ +1.1403160e+05 \ +1.1786420e+05 \ +1.0863220e+05 \ +1.1509160e+05 \ +1.1620620e+05 \ +1.0982220e+05 \ +1.0902740e+05 \ +1.1087040e+05 \ +1.1021550e+05 \ +1.1327230e+05 \ +1.1327230e+05 \ +1.1281050e+05 \ +1.1586750e+05 \ +1.1290360e+05 \ +1.1421740e+05 \ +1.1169860e+05 \ +1.1175380e+05 \ +1.1501060e+05 \ +1.1546110e+05 \ +1.1490530e+05 \ +1.1442280e+05 \ +1.1001640e+05 \ +1.1065880e+05 \ +1.1337980e+05 \ +1.1320960e+05 \ +1.1680190e+05 \ +1.1283980e+05 \ +1.1480690e+05 \ +1.1292130e+05 \ +1.1063620e+05 \ +1.1268010e+05 \ +1.0969650e+05 \ +1.0878680e+05 \ +1.0983590e+05 \ +# 1.1938150e+05 \ +1.1230700e+05 \ +1.1608290e+05 \ +1.1121100e+05 \ +1.1302670e+05 \ +1.0973620e+05 \ +1.0999550e+05 \ +1.1285310e+05 \ +1.1222700e+05 \ +# 1.1666950e+05 \ +1.1387310e+05 \ +1.1448740e+05 \ +# 1.1609790e+05 \ +1.1169400e+05 \ +1.1051720e+05 \ +# 1.1650630e+05 \ +1.0822710e+05 \ +1.1352280e+05 \ +1.1426090e+05 \ +# 1.1619040e+05 \ +1.1107040e+05 \ +1.1172390e+05 \ +1.1078840e+05 \ +1.1277040e+05 \ +1.0859650e+05 \ +1.0828430e+05 \ +1.0758030e+05 \ +1.1079780e+05 \ +1.0675920e+05 \ +1.0620650e+05 \ +1.0758200e+05 \ +1.0733220e+05 \ +1.0653950e+05 \ +1.0745060e+05 \ +# 1.0514830e+05 \ +# 1.0504560e+05 \ +# 1.0237260e+05 \ +# 1.0290360e+05 \ +1.0599090e+05 \ +# 1.0392340e+05 \ +# 1.0398660e+05 \ +# 1.0518230e+05 \ +# 1.0568330e+05 \ +# 1.0257700e+05 \ +# 1.0530780e+05 \ +# 1.0530780e+05 \ +1.0673880e+05 \ +1.0673880e+05 \ +# 1.0054990e+05 \ +# 1.0054990e+05 \ +# 1.0043940e+05 \ +# 1.0327160e+05 \ +# 1.0241080e+05 \ +1.0757810e+05 \ +# 1.0594600e+05 \ +# 1.0554300e+05 \ +# 1.0586250e+05 \ +# 1.0508550e+05 \ +# 1.0916430e+05 \ +1.1185510e+05 \ +1.2209590e+05 \ +1.2132380e+05 \ +1.2608250e+05 \ +1.2870370e+05 \ +1.2540630e+05 \ +1.2679520e+05 \ +1.2818660e+05 \ +1.2747960e+05 \ +1.2868980e+05 \ +1.3092340e+05 \ +# 1.3163610e+05 \ +1.2822290e+05 \ +1.2748640e+05 \ +1.3092120e+05 \ +1.2934190e+05 \ +1.2938330e+05 \ +1.2811170e+05 \ +1.2871140e+05 \ +1.2245050e+05 \ +1.2649040e+05 \ +1.2605460e+05 \ +1.3117320e+05 \ +1.3124120e+05 \ +1.2954730e+05 \ +1.3141500e+05 \ +1.2800200e+05 \ +1.2896810e+05 \ +# 1.3346090e+05 \ +1.2890590e+05 \ +1.2456190e+05 \ +1.2808970e+05 \ +1.3037580e+05 \ +1.2883940e+05 \ +1.3059430e+05 \ +1.2588180e+05 \ +1.2816560e+05 \ +1.3033430e+05 \ +1.2655480e+05 \ +1.2575330e+05 \ +1.2802460e+05 \ +1.2678440e+05 \ +1.2901260e+05 \ +1.2996230e+05 \ +1.2682790e+05 \ +1.2715320e+05 \ +1.2744330e+05 \ +1.2854230e+05 \ +1.2910340e+05 \ +1.2855810e+05 \ +1.2715320e+05 \ +1.2900770e+05 \ +1.2734270e+05 \ +1.2418820e+05 \ +1.3016740e+05 \ +1.2635860e+05 \ +1.2687660e+05 \ +1.2843270e+05 \ +1.2843270e+05 \ +1.3085780e+05 \ +1.2661870e+05 \ +1.2661870e+05 \ +1.2780610e+05 \ +1.3029260e+05 \ +1.2884010e+05 \ +1.2398900e+05 \ +1.2467440e+05 \ +1.2823000e+05 \ +1.2831840e+05 \ +1.2829600e+05 \ +1.2390490e+05 \ +1.2784350e+05 \ +1.2425190e+05 \ +1.2618470e+05 \ +1.2956240e+05 \ +1.3033000e+05 \ +# 1.3346450e+05 \ +1.2755270e+05 \ +1.2725180e+05 \ +1.3273170e+05 \ +1.3096870e+05 \ +1.2989110e+05 \ +1.2903960e+05 \ +1.2536640e+05 \ +1.2758890e+05 \ +1.3288440e+05 \ +1.2806840e+05 \ +1.2701140e+05 \ +1.2798040e+05 \ +1.2827360e+05 \ +1.2791560e+05 \ +1.2776290e+05 \ +1.2442150e+05 \ +1.3103440e+05 \ +1.3222830e+05 \ +1.2801950e+05 \ +1.2985330e+05 \ +1.2783500e+05 \ +1.2651110e+05 \ +1.2806870e+05 \ +1.2829530e+05 \ +1.3042170e+05 \ +1.2836120e+05 \ +1.2347040e+05 \ +1.2972400e+05 \ +1.2972400e+05 \ +1.2808970e+05 \ +1.2936260e+05 \ +1.2853580e+05 \ +1.2754430e+05 \ +1.2730550e+05 \ +1.2995510e+05 \ +1.2588070e+05 \ +1.2333570e+05 \ +1.2512200e+05 \ +1.2860080e+05 \ +# 1.2257500e+05 \ +1.2781750e+05 \ +1.2551440e+05 \ +1.2536450e+05 \ +1.2431440e+05 \ +1.2691520e+05 \ +1.2923280e+05 \ +1.2657560e+05 \ +# 1.3309500e+05 \ +# 1.2235460e+05 \ +1.2817730e+05 \ +1.3119070e+05 \ +1.2824270e+05 \ +1.2471700e+05 \ +1.3177720e+05 \ +1.2784970e+05 \ +1.3087330e+05 \ +1.3125530e+05 \ +1.2426900e+05 \ +1.3102650e+05 \ +1.2685160e+05 \ +1.2740010e+05 \ +1.2622760e+05 \ +1.2642370e+05 \ +1.2657620e+05 \ +# 1.1938650e+05 \ +# 1.1378500e+05 \ +# 9.0702050e+04 \ +# 7.7599840e+04 \ +# 7.0363290e+04 \ +# 6.8337190e+04 \ +# 6.9196950e+04 \ +# 8.4798660e+04 \ +# 8.4798660e+04 \ +# 9.9172590e+04 \ +# 1.1636820e+05 \ +1.2532930e+05 \ +1.2532930e+05 \ +1.2650430e+05 \ +1.2926140e+05 \ +1.2839720e+05 \ +1.2967410e+05 \ +1.2758140e+05 \ +1.2526090e+05 \ +1.2989840e+05 \ +1.2789390e+05 \ +1.2877510e+05 \ +1.2813370e+05 \ +# 1.3544590e+05 \ +1.2960480e+05 \ +1.2715290e+05 \ +1.3099060e+05 \ +1.2750120e+05 \ +1.2426410e+05 \ +1.2885810e+05 \ +1.2547620e+05 \ +1.3012930e+05 \ +1.2530800e+05 \ +1.3119290e+05 \ +1.2400660e+05 \ +# 1.3237820e+05 \ +1.2454530e+05 \ +1.2491510e+05 \ +1.2685660e+05 \ +1.2578240e+05 \ +1.3044830e+05 \ +1.3030920e+05 \ +1.2448550e+05 \ +1.2627540e+05 \ +1.2855810e+05 \ +1.3216240e+05 \ +1.2798040e+05 \ +1.2747970e+05 \ +1.2594600e+05 \ +1.2732760e+05 \ +1.3202010e+05 \ +1.2741500e+05 \ +1.3174240e+05 \ +1.2549890e+05 \ +1.2903680e+05 \ +1.3076880e+05 \ +1.2579280e+05 \ +1.2503200e+05 \ +1.2521530e+05 \ +1.2912420e+05 \ +1.3189900e+05 \ +1.2709540e+05 \ +1.2803450e+05 \ +1.2787220e+05 \ +1.2800200e+05 \ +1.2952440e+05 \ +1.2537340e+05 \ +1.3257910e+05 \ +1.2694640e+05 \ +1.2680580e+05 \ +1.2659980e+05 \ +1.2435050e+05 \ +1.2888430e+05 \ +1.2613940e+05 \ +1.3018350e+05 \ +1.2455810e+05 \ +1.2455810e+05 \ +1.2812690e+05 \ +1.2812690e+05 \ +1.2785050e+05 \ +1.2522120e+05 \ +1.3271280e+05 \ +1.2812880e+05 \ +1.2851410e+05 \ +1.2918940e+05 \ +# 1.3498410e+05 \ +1.2793690e+05 \ +1.2696870e+05 \ +1.2488900e+05 \ +1.2743630e+05 \ +1.3304060e+05 \ +1.2694720e+05 \ +1.3109090e+05 \ +1.3013510e+05 \ +1.3085750e+05 \ +1.3015840e+05 \ +1.3138600e+05 \ +1.2866670e+05 \ +1.3204500e+05 \ +1.2873040e+05 \ +# 1.3415010e+05 \ +1.2877590e+05 \ +1.3238240e+05 \ +1.3033430e+05 \ +1.2616410e+05 \ +1.2855750e+05 \ +1.3233550e+05 \ +1.2416260e+05 \ +1.3202710e+05 \ +1.2969320e+05 \ +1.2769690e+05 \ +1.3072510e+05 \ +1.2871780e+05 \ +1.2769380e+05 \ +1.3126010e+05 \ +1.3018020e+05 \ +1.2879900e+05 \ +1.2915930e+05 \ +1.2868840e+05 \ +1.2726280e+05 \ +1.3055070e+05 \ +1.2771990e+05 \ +1.2732740e+05 \ +1.2474800e+05 \ +1.2476760e+05 \ +1.2635720e+05 \ +1.2875410e+05 \ +1.2853580e+05 \ +1.2945100e+05 \ +1.2789390e+05 \ +1.2972270e+05 \ +1.2653260e+05 \ +1.2908030e+05 \ +1.2857980e+05 \ +1.2912510e+05 \ +1.3125780e+05 \ +1.2580030e+05 \ +1.2666490e+05 \ +1.2695770e+05 \ +1.3132580e+05 \ +1.2756740e+05 \ +1.2853640e+05 \ +1.2594500e+05 \ +1.2846990e+05 \ +1.2666310e+05 \ +1.2666310e+05 \ +1.2663270e+05 \ +1.2903590e+05 \ +1.2716380e+05 \ +1.2761050e+05 \ +1.3108350e+05 \ +1.2629200e+05 \ +1.2525950e+05 \ +1.2551900e+05 \ +1.2448400e+05 \ +1.2959920e+05 \ +1.3068970e+05 \ +1.2921010e+05 \ +1.2564380e+05 \ +1.2438650e+05 \ +1.2338210e+05 \ +1.2495780e+05 \ +1.2334200e+05 \ +1.2473660e+05 \ +1.2629500e+05 \ +1.2569680e+05 \ +1.2637630e+05 \ +1.2232540e+05 \ +1.2685970e+05 \ +1.2711010e+05 \ +1.2444620e+05 \ +1.2450260e+05 \ +1.2721950e+05 \ +1.2459170e+05 \ +# 1.2104360e+05 \ +1.2666360e+05 \ +1.2782340e+05 \ +1.3034360e+05 \ +1.2374550e+05 \ +1.2415460e+05 \ +1.2640230e+05 \ +1.2437180e+05 \ +1.2476110e+05 \ +1.2581850e+05 \ +1.2644450e+05 \ +1.2735050e+05 \ +1.2849100e+05 \ +1.2566530e+05 \ +1.2688120e+05 \ +1.2965720e+05 \ +1.2903680e+05 \ +1.2564260e+05 \ +1.2780620e+05 \ +1.2829530e+05 \ +1.2433240e+05 \ +1.2463930e+05 \ +1.2767070e+05 \ +1.2360430e+05 \ +1.2498240e+05 \ +1.2228480e+05 \ +1.2408890e+05 \ +1.2408890e+05 \ +1.2872430e+05 \ +1.2619840e+05 \ +1.2503840e+05 \ +1.2675220e+05 \ +1.2166050e+05 \ +1.2966570e+05 \ +1.2668060e+05 \ +1.2782780e+05 \ +1.2382840e+05 \ +1.2435760e+05 \ +1.2631810e+05 \ +1.2151710e+05 \ +# 1.2055060e+05 \ +1.2093750e+05 \ +# 1.0847690e+05 \ +# 9.3351330e+04 \ +# 8.6079190e+04 \ +# 9.3725880e+04 \ +# 1.1127180e+05 \ +# 1.1988900e+05 \ +1.2711170e+05 \ +1.2289390e+05 \ +1.2302360e+05 \ +1.2158640e+05 \ +1.2978660e+05 \ +1.2569300e+05 \ +1.2681120e+05 \ +1.2392850e+05 \ +1.2724080e+05 \ +1.2726210e+05 \ +1.2696800e+05 \ +1.2643250e+05 \ +1.2661700e+05 \ +1.2758890e+05 \ +1.2573620e+05 \ +1.2541070e+05 \ +1.2517110e+05 \ +1.2517110e+05 \ +1.2767550e+05 \ +1.2493320e+05 \ +1.2370950e+05 \ +1.3063610e+05 \ +1.2552030e+05 \ +1.2753810e+05 \ +1.2887220e+05 \ +1.2200540e+05 \ +1.2700660e+05 \ +1.2614910e+05 \ +1.2599700e+05 \ +1.2365680e+05 \ +1.2222120e+05 \ +1.2670620e+05 \ +1.2319160e+05 \ +1.3046900e+05 \ +1.2795930e+05 \ +1.2949690e+05 \ +1.2787180e+05 \ +1.2752960e+05 \ +1.3092330e+05 \ +1.3022110e+05 \ +1.2488750e+05 \ +1.2565130e+05 \ +1.2272310e+05 \ +1.2813410e+05 \ +1.2532090e+05 \ +# 1.2158640e+05 \ +1.2690220e+05 \ +1.2536510e+05 \ +1.2819920e+05 \ +1.2565020e+05 \ +1.2849310e+05 \ +1.2681490e+05 \ +1.2975870e+05 \ +1.2384970e+05 \ +1.2677030e+05 \ +1.2655290e+05 \ +1.2864630e+05 \ +1.2481150e+05 \ +1.2435560e+05 \ +1.2614910e+05 \ +1.2759600e+05 \ +1.2920910e+05 \ +1.2518460e+05 \ +1.2518970e+05 \ +1.2580060e+05 \ +1.2895110e+05 \ +1.2562640e+05 \ +1.2602010e+05 \ +1.2715930e+05 \ +1.2644330e+05 \ +1.2715290e+05 \ +1.2705380e+05 \ +1.2666310e+05 \ +1.2742460e+05 \ +1.2650930e+05 \ +1.2778480e+05 \ +1.2986930e+05 \ +1.2884170e+05 \ +1.2560730e+05 \ +1.2712070e+05 \ +1.2877660e+05 \ +1.2720700e+05 \ +1.2600200e+05 \ +1.2383290e+05 \ +1.2383290e+05 \ +1.2489390e+05 \ +# 1.2114240e+05 \ +# 1.2142780e+05 \ +# 1.2142780e+05 \ +1.2433430e+05 \ +1.2535110e+05 \ +1.2945680e+05 \ +1.2584160e+05 \ +1.2762220e+05 \ +1.3161410e+05 \ +1.3152720e+05 \ +# 1.3611750e+05 \ +1.3352810e+05 \ +1.3628440e+05 \ +1.3161280e+05 \ +1.3412450e+05 \ +1.3325940e+05 \ +1.3529780e+05 \ +1.2923490e+05 \ +1.3639160e+05 \ +1.3128200e+05 \ +1.3516650e+05 \ +1.3375400e+05 \ +1.2897380e+05 \ +1.3214020e+05 \ +1.3372930e+05 \ +1.3384590e+05 \ +1.3590300e+05 \ +1.3566730e+05 \ +1.3710470e+05 \ +1.3305060e+05 \ +1.3328870e+05 \ +# 1.4040850e+05 \ +1.3334450e+05 \ +1.3569070e+05 \ +1.3335570e+05 \ +1.3451280e+05 \ +1.3866460e+05 \ +1.3620300e+05 \ +1.3290950e+05 \ +1.3360780e+05 \ +1.3598900e+05 \ +1.3514900e+05 \ +1.3790530e+05 \ +1.3370330e+05 \ +1.3612360e+05 \ +1.3859160e+05 \ +1.3568110e+05 \ +1.3168530e+05 \ +1.3526230e+05 \ +1.3382370e+05 \ +1.3068910e+05 \ +1.3944770e+05 \ +1.3388640e+05 \ +1.3586340e+05 \ +1.3253150e+05 \ +1.3093190e+05 \ +1.3774830e+05 \ +1.3505290e+05 \ +1.3687340e+05 \ +1.3430720e+05 \ +1.3355110e+05 \ +1.3788270e+05 \ +1.3644870e+05 \ +1.3346390e+05 \ +1.3583050e+05 \ +1.3315020e+05 \ +1.3422510e+05 \ +1.3615150e+05 \ +1.3557410e+05 \ +1.3754530e+05 \ +1.3174870e+05 \ +1.3618700e+05 \ +# 1.2822240e+05 \ +1.3489700e+05 \ +1.3489700e+05 \ +1.3877250e+05 \ +# 1.2871930e+05 \ +1.3510420e+05 \ +1.3608410e+05 \ +1.3233160e+05 \ +1.3442000e+05 \ +# 1.2889280e+05 \ +1.3846590e+05 \ +1.3240020e+05 \ +1.3541560e+05 \ +1.3590830e+05 \ +1.3847970e+05 \ +1.3797980e+05 \ +1.3765680e+05 \ +1.3714520e+05 \ +1.3574460e+05 \ +1.3557410e+05 \ +1.3552260e+05 \ +1.3366660e+05 \ +1.4038570e+05 \ +1.3444230e+05 \ +1.3625040e+05 \ +1.3270990e+05 \ +1.3483780e+05 \ +1.3592010e+05 \ +1.3485810e+05 \ +1.3541410e+05 \ +1.3364430e+05 \ +1.3469720e+05 \ +1.3771440e+05 \ +1.3480640e+05 \ +1.3494630e+05 \ +1.3357600e+05 \ +1.3514900e+05 \ +1.3645260e+05 \ +1.3280770e+05 \ +1.3781680e+05 \ +1.3538830e+05 \ +1.3568110e+05 \ +1.3470240e+05 \ +1.3574460e+05 \ +1.3315060e+05 \ +1.3517210e+05 \ +1.3521200e+05 \ +1.3179080e+05 \ +1.3790530e+05 \ +1.3778120e+05 \ +1.3601670e+05 \ +1.3933510e+05 \ +1.3590830e+05 \ +1.3181900e+05 \ +1.3583050e+05 \ +1.3836800e+05 \ +1.3827410e+05 \ +1.3500860e+05 \ +1.3379740e+05 \ +1.3329600e+05 \ +1.3235610e+05 \ +1.3933510e+05 \ +1.3721290e+05 \ +1.3627140e+05 \ +1.3235950e+05 \ +1.3235950e+05 \ +1.3707830e+05 \ +1.3536040e+05 \ +1.3737480e+05 \ +1.3745570e+05 \ +1.3429350e+05 \ +1.3713320e+05 \ +1.3380510e+05 \ +1.3438120e+05 \ +1.3566370e+05 \ +1.3780070e+05 \ +1.3815580e+05 \ +1.3123800e+05 \ +1.3424320e+05 \ +# 1.3903670e+05 \ +1.3390860e+05 \ +1.3617620e+05 \ +# 1.3885780e+05 \ +1.3364260e+05 \ +1.3629530e+05 \ +1.3800950e+05 \ +1.3320010e+05 \ +1.3337110e+05 \ +1.3667100e+05 \ +1.3565500e+05 \ +1.3607330e+05 \ +1.3529730e+05 \ +# 1.3771700e+05 \ +1.3064140e+05 \ +1.2545610e+05 \ +1.2065880e+05 \ +1.2065880e+05 \ +# 1.1051840e+05 \ +# 1.1060850e+05 \ +# 1.1283270e+05 \ +# 1.0823010e+05 \ +1.1238810e+05 \ +# 1.1006930e+05 \ +# 1.0918910e+05 \ +# 1.1061920e+05 \ +1.1422910e+05 \ +1.1360430e+05 \ +1.1796200e+05 \ +# 1.1032310e+05 \ +1.1230670e+05 \ +1.1370680e+05 \ +1.1692870e+05 \ +1.1311060e+05 \ +1.1168490e+05 \ +1.1469000e+05 \ +1.1497410e+05 \ +1.1663660e+05 \ +1.1200550e+05 \ +# 1.1057960e+05 \ +1.1422380e+05 \ +1.1628610e+05 \ +1.1353970e+05 \ +# 1.0972470e+05 \ +1.1280920e+05 \ +1.1612800e+05 \ +1.1386310e+05 \ +1.1480910e+05 \ +1.1126220e+05 \ +1.1529870e+05 \ +# 1.2197110e+05 \ +1.1370820e+05 \ +1.1624550e+05 \ +1.1835330e+05 \ +1.1703980e+05 \ +1.1678470e+05 \ +1.1745180e+05 \ +1.1157370e+05 \ +# 1.0827400e+05 \ +1.1157330e+05 \ +1.1246700e+05 \ +1.1469370e+05 \ +1.1661500e+05 \ +1.1563750e+05 \ +1.1474230e+05 \ +1.1575550e+05 \ +1.1893930e+05 \ +1.1619760e+05 \ +# 1.1989830e+05 \ +1.1555510e+05 \ +1.1467210e+05 \ +1.1678540e+05 \ +1.1432660e+05 \ +1.1565290e+05 \ +1.1665480e+05 \ +1.1212620e+05 \ +1.1652550e+05 \ +1.1869030e+05 \ +1.1601990e+05 \ +1.1269970e+05 \ +1.1544150e+05 \ +1.1515240e+05 \ +1.1319210e+05 \ +1.1421450e+05 \ +1.1428640e+05 \ +1.1023730e+05 \ +1.1526140e+05 \ +1.1980360e+05 \ +1.1265840e+05 \ +1.1858170e+05 \ +1.1308910e+05 \ +1.1308910e+05 \ +1.1636870e+05 \ +1.1501770e+05 \ +1.1289170e+05 \ +1.1289170e+05 \ +1.1887760e+05 \ +# 1.0939530e+05 \ +1.1562740e+05 \ +1.1573760e+05 \ +1.1509160e+05 \ +1.1392060e+05 \ +1.1452120e+05 \ +1.1097150e+05 \ +1.1452740e+05 \ +1.1317210e+05 \ +1.1530310e+05 \ +1.1824340e+05 \ +1.1533910e+05 \ +1.1401650e+05 \ +1.1677200e+05 \ +1.1594410e+05 \ +1.1545650e+05 \ +1.1442010e+05 \ +1.1289050e+05 \ +1.1465040e+05 \ +1.1306240e+05 \ +1.1315960e+05 \ +1.1396820e+05 \ +1.1307440e+05 \ +1.1448410e+05 \ +1.1398830e+05 \ +1.1619210e+05 \ +1.1206530e+05 \ +1.1500960e+05 \ +1.1701250e+05 \ +1.1475680e+05 \ +1.1099260e+05 \ +1.1491950e+05 \ +1.1704650e+05 \ +1.1684830e+05 \ +1.1272810e+05 \ +1.1131230e+05 \ +# 1.2049040e+05 \ +1.1578330e+05 \ +1.1490770e+05 \ +1.1694320e+05 \ +1.1701250e+05 \ +1.1755020e+05 \ +# 1.0620790e+05 \ +1.1419770e+05 \ +1.1544860e+05 \ +1.1463250e+05 \ +1.1712940e+05 \ +1.1433830e+05 \ +1.1665700e+05 \ +1.1147380e+05 \ +1.1410000e+05 \ +1.1524560e+05 \ +1.1539970e+05 \ +1.1392210e+05 \ +1.1290380e+05 \ +1.1436510e+05 \ +1.1600490e+05 \ +1.1198550e+05 \ +1.1907310e+05 \ +1.1418440e+05 \ +1.1645090e+05 \ +1.1793380e+05 \ +1.1878150e+05 \ +1.1246700e+05 \ +1.1361550e+05 \ +1.1507020e+05 \ +1.1610530e+05 \ +1.1651230e+05 \ +1.1355970e+05 \ +1.1604680e+05 \ +1.1811920e+05 \ +1.1548670e+05 \ +1.1568570e+05 \ +1.1518850e+05 \ +1.1581100e+05 \ +1.1356290e+05 \ +1.1529080e+05 \ +1.1529080e+05 \ +1.1700570e+05 \ +1.1752220e+05 \ +1.1393810e+05 \ +1.1738540e+05 \ +1.1695300e+05 \ +1.1794790e+05 \ +1.1524910e+05 \ +1.1563990e+05 \ +1.2041380e+05 \ +1.1631370e+05 \ +1.1360430e+05 \ +1.1404530e+05 \ +1.1345360e+05 \ +1.1530670e+05 \ +1.1684130e+05 \ +# 1.1159320e+05 \ +1.1487530e+05 \ +1.1539970e+05 \ +1.1639490e+05 \ +1.1575550e+05 \ +1.1527760e+05 \ +1.1726300e+05 \ +1.2270910e+05 \ +1.2325150e+05 \ +# 1.2960910e+05 \ +1.2472960e+05 \ +1.2754220e+05 \ +1.2616790e+05 \ +1.2902200e+05 \ +1.2878280e+05 \ +1.3026980e+05 \ +# 1.3300720e+05 \ +1.2770870e+05 \ +1.2687130e+05 \ +1.3136780e+05 \ +1.2943370e+05 \ +1.2633200e+05 \ +1.2902200e+05 \ +1.3223370e+05 \ +1.2752030e+05 \ +1.3190390e+05 \ +1.2800780e+05 \ +1.2952120e+05 \ +1.3151290e+05 \ +1.3096680e+05 \ +1.2983430e+05 \ +1.2669670e+05 \ +1.3116250e+05 \ +1.3248660e+05 \ +1.3188310e+05 \ +1.2827300e+05 \ +1.2934760e+05 \ +1.2575330e+05 \ +1.3079100e+05 \ +1.2580110e+05 \ +1.2759940e+05 \ +1.3013940e+05 \ +1.3011910e+05 \ +1.2689590e+05 \ +1.3192170e+05 \ +1.2612520e+05 \ +1.2612520e+05 \ +# 1.2233680e+05 \ +1.2873960e+05 \ +1.2826550e+05 \ +1.3046840e+05 \ +1.2994410e+05 \ +1.2829060e+05 \ +1.2676540e+05 \ +1.2764410e+05 \ +1.2665780e+05 \ +1.2840390e+05 \ +1.2945540e+05 \ +1.3138750e+05 \ +1.2876120e+05 \ +1.3300410e+05 \ +1.3000910e+05 \ +1.2846860e+05 \ +1.2786050e+05 \ +1.2842580e+05 \ +1.3042180e+05 \ +1.2696020e+05 \ +1.2919120e+05 \ +1.3067160e+05 \ +1.3196920e+05 \ +1.2665700e+05 \ +1.2682960e+05 \ +1.2820740e+05 \ +1.3065910e+05 \ +1.2981260e+05 \ +1.3107710e+05 \ +1.2858680e+05 \ +1.3311710e+05 \ +1.2904360e+05 \ +1.2998620e+05 \ +1.2494160e+05 \ +1.3335540e+05 \ +1.2954190e+05 \ +1.2982650e+05 \ +1.3308280e+05 \ +1.2954400e+05 \ +1.2783900e+05 \ +1.2783900e+05 \ +1.2876770e+05 \ +1.2676470e+05 \ +1.2714880e+05 \ +1.2807080e+05 \ +1.2902270e+05 \ +1.2615240e+05 \ +1.2693990e+05 \ +1.2586270e+05 \ +1.3440860e+05 \ +1.3181940e+05 \ +1.3074750e+05 \ +1.2822940e+05 \ +# 1.2350510e+05 \ +1.3156140e+05 \ +1.3109910e+05 \ +1.3131470e+05 \ +1.3208830e+05 \ +1.2868540e+05 \ +1.2812130e+05 \ +1.3021190e+05 \ +1.3024520e+05 \ +1.2943520e+05 \ +1.3081280e+05 \ +1.2809280e+05 \ +1.3002950e+05 \ +1.2661350e+05 \ +1.3406070e+05 \ +1.2992120e+05 \ +1.2814300e+05 \ +1.3197080e+05 \ +1.3173100e+05 \ +1.3423000e+05 \ +1.3063730e+05 \ +1.3202260e+05 \ +1.2577130e+05 \ +1.3033340e+05 \ +1.2504340e+05 \ +1.3180230e+05 \ +1.3281890e+05 \ +1.2895710e+05 \ +# 1.2440060e+05 \ +1.3283780e+05 \ +1.3031620e+05 \ +# 1.3563530e+05 \ +1.2745250e+05 \ +1.2748490e+05 \ +1.3099450e+05 \ +1.3057710e+05 \ +1.2969060e+05 \ +1.3162610e+05 \ +1.3092130e+05 \ +1.3296490e+05 \ +1.3055200e+05 \ +1.2908680e+05 \ +# 1.2472580e+05 \ +1.2897870e+05 \ +1.3172060e+05 \ +1.2698420e+05 \ +1.2958620e+05 \ +1.3246460e+05 \ +1.3230240e+05 \ +1.3022490e+05 \ +1.2730390e+05 \ +1.2864540e+05 \ +1.3127910e+05 \ +1.3142550e+05 \ +1.2543200e+05 \ +1.2861530e+05 \ +1.2872410e+05 \ +1.3031170e+05 \ +1.2633670e+05 \ +1.2963100e+05 \ +1.3040330e+05 \ +1.3068430e+05 \ +1.3062240e+05 \ +1.3062240e+05 \ +1.2705900e+05 \ +1.2705900e+05 \ +1.3265650e+05 \ +1.3265650e+05 \ +1.2971660e+05 \ +# 1.2200510e+05 \ +1.2732570e+05 \ +1.3253040e+05 \ +1.3307000e+05 \ +1.2919560e+05 \ +1.2875380e+05 \ +1.2679350e+05 \ +1.2918740e+05 \ +1.2927770e+05 \ +1.3153730e+05 \ +# 1.3228600e+05 \ +1.2702590e+05 \ +1.2979090e+05 \ +1.2820760e+05 \ +# 1.3252340e+05 \ +1.2871750e+05 \ +# 1.3050700e+05 \ +1.2682900e+05 \ +1.2468190e+05 \ +1.2413890e+05 \ +1.2017070e+05 \ +1.1846720e+05 \ +1.2021370e+05 \ +1.2212380e+05 \ +1.2524630e+05 \ +1.1909310e+05 \ +1.2170180e+05 \ +1.2170820e+05 \ +# 1.1496690e+05 \ +1.1678180e+05 \ +# 1.1585620e+05 \ +1.2125970e+05 \ +1.1882120e+05 \ +1.2130550e+05 \ +# 1.1575050e+05 \ +1.1800560e+05 \ +# 1.1605400e+05 \ +1.1987790e+05 \ +1.1820760e+05 \ +# 1.1553010e+05 \ +1.1999900e+05 \ +1.1936580e+05 \ +1.2187950e+05 \ +1.2113060e+05 \ +1.1983630e+05 \ +1.1856550e+05 \ +1.2045320e+05 \ +1.1618210e+05 \ +1.1925520e+05 \ +1.2079350e+05 \ +1.2300780e+05 \ +1.1812210e+05 \ +1.1939210e+05 \ +1.1944850e+05 \ +1.2086670e+05 \ +1.1943300e+05 \ +1.2063790e+05 \ +1.2010480e+05 \ +1.1731550e+05 \ +1.1717530e+05 \ +1.2109370e+05 \ +1.1976270e+05 \ +1.2175940e+05 \ +1.1964310e+05 \ +1.2186340e+05 \ +# 1.2471410e+05 \ +1.1955870e+05 \ +1.2005460e+05 \ +1.2013380e+05 \ +1.1850300e+05 \ +1.2291410e+05 \ +1.2461430e+05 \ +1.1952270e+05 \ +1.1952270e+05 \ +1.2286630e+05 \ +1.2219170e+05 \ +1.1703030e+05 \ +1.1703030e+05 \ +1.1823990e+05 \ +1.2008750e+05 \ +1.2008750e+05 \ +1.2059170e+05 \ +1.1833630e+05 \ +1.2109700e+05 \ +1.1865520e+05 \ +# 1.1376180e+05 \ +1.1904290e+05 \ +1.2044480e+05 \ +1.1960020e+05 \ +# 1.2509190e+05 \ +1.2228660e+05 \ +1.2274910e+05 \ +1.2088330e+05 \ +1.1853270e+05 \ +1.1961590e+05 \ +1.1815730e+05 \ +1.1952780e+05 \ +1.2314930e+05 \ +1.1919330e+05 \ +1.2130550e+05 \ +1.1755050e+05 \ +1.1685050e+05 \ +# 1.1352230e+05 \ +# 1.2566790e+05 \ +1.2086170e+05 \ +1.1918270e+05 \ +1.2074740e+05 \ +1.1911180e+05 \ +1.2235700e+05 \ +1.1963440e+05 \ +1.2191260e+05 \ +1.2073880e+05 \ +1.1930200e+05 \ +1.1667790e+05 \ +1.2015090e+05 \ +1.1886400e+05 \ +1.2041160e+05 \ +1.2118280e+05 \ +1.2149980e+05 \ +1.2364930e+05 \ +1.2146270e+05 \ +1.1903090e+05 \ +1.1783940e+05 \ +1.1962670e+05 \ +# 1.1394640e+05 \ +1.1994160e+05 \ +1.2077320e+05 \ +1.1770030e+05 \ +1.1821340e+05 \ +1.2184980e+05 \ +1.2184980e+05 \ +1.2456190e+05 \ +1.1829420e+05 \ +1.2218480e+05 \ +1.2285100e+05 \ +# 1.1503710e+05 \ +1.2126370e+05 \ +1.2265420e+05 \ +1.1948630e+05 \ +1.1957440e+05 \ +1.2318620e+05 \ +1.1894130e+05 \ +1.2037460e+05 \ +1.2233350e+05 \ +1.2120510e+05 \ +1.1805890e+05 \ +1.2242420e+05 \ +1.2231920e+05 \ +# 1.2589900e+05 \ +1.2267840e+05 \ +1.2136920e+05 \ +1.2202600e+05 \ +1.2203500e+05 \ +1.2096680e+05 \ +1.1740520e+05 \ +1.1598150e+05 \ +1.1870270e+05 \ +1.1572450e+05 \ +1.2060050e+05 \ +1.2362670e+05 \ +1.1935010e+05 \ +1.1806150e+05 \ +1.2153860e+05 \ +1.2018680e+05 \ +1.1889990e+05 \ +1.2032200e+05 \ +1.1844590e+05 \ +1.2044870e+05 \ +1.2229670e+05 \ +1.1672710e+05 \ +1.2124560e+05 \ +1.1993680e+05 \ +1.2409900e+05 \ +1.1953340e+05 \ +1.1872740e+05 \ +1.1947070e+05 \ +1.1924490e+05 \ +1.1869710e+05 \ +1.2010350e+05 \ +1.2284830e+05 \ +1.2182880e+05 \ +1.1820450e+05 \ +1.1820450e+05 \ +1.1957900e+05 \ +1.1922940e+05 \ +1.1922940e+05 \ +1.2100760e+05 \ +1.1649890e+05 \ +1.2211690e+05 \ +1.2085810e+05 \ +1.1746040e+05 \ +1.1817640e+05 \ +1.2290090e+05 \ +1.1720740e+05 \ +1.2025060e+05 \ +1.2017640e+05 \ +1.2081500e+05 \ +1.2034280e+05 \ +1.1769320e+05 \ +1.1583970e+05 \ +# 1.1434160e+05 \ +1.1741040e+05 \ +1.1853930e+05 \ +1.1991900e+05 \ +1.1969930e+05 \ +1.1587710e+05 \ +1.1987120e+05 \ +1.2007020e+05 \ +1.2140280e+05 \ +1.1516610e+05 \ +1.1962200e+05 \ +1.1925100e+05 \ +1.1932080e+05 \ +1.1867580e+05 \ +1.2050460e+05 \ +1.2329350e+05 \ +1.1964770e+05 \ +1.1858040e+05 \ +1.2133490e+05 \ +1.1960640e+05 \ +1.1584930e+05 \ +1.1986680e+05 \ +1.1740580e+05 \ +1.2017420e+05 \ +1.1649060e+05 \ +1.1705490e+05 \ +# 1.1315200e+05 \ +1.1862150e+05 \ +1.2051780e+05 \ +1.2216900e+05 \ +1.1790970e+05 \ +1.1902120e+05 \ +1.1997320e+05 \ +1.1792910e+05 \ +1.1686870e+05 \ +1.2077260e+05 \ +1.1704650e+05 \ +1.1642040e+05 \ +1.2248130e+05 \ +1.1743140e+05 \ +1.1885140e+05 \ +1.1849240e+05 \ +1.1790240e+05 \ +1.1923580e+05 \ +1.2240720e+05 \ +1.1942410e+05 \ +1.2048630e+05 \ +1.1675860e+05 \ +1.1849240e+05 \ +1.2016800e+05 \ +1.1873140e+05 \ +1.2237560e+05 \ +1.1961180e+05 \ +1.1863460e+05 \ +1.2041290e+05 \ +1.2092020e+05 \ +1.2016800e+05 \ +1.2083620e+05 \ +1.2083620e+05 \ +1.1927850e+05 \ +1.1887200e+05 \ +1.1887200e+05 \ +1.1591160e+05 \ +1.2074020e+05 \ +1.1987120e+05 \ +1.1987120e+05 \ +1.2285270e+05 \ +1.2039690e+05 \ +1.1771930e+05 \ +1.2254740e+05 \ +1.2015430e+05 \ +1.1594480e+05 \ +1.1915340e+05 \ +1.1940880e+05 \ +1.1805970e+05 \ +1.1897310e+05 \ +1.1591910e+05 \ +1.2316320e+05 \ +1.2010590e+05 \ +1.1430130e+05 \ +1.1647360e+05 \ +1.2225610e+05 \ +1.1698530e+05 \ +1.2243810e+05 \ +1.1812730e+05 \ +1.2212850e+05 \ +1.1717030e+05 \ +1.1814780e+05 \ +1.1819020e+05 \ +1.1882140e+05 \ +1.1972140e+05 \ +1.2206340e+05 \ +1.1631220e+05 \ +1.2019000e+05 \ +1.1802420e+05 \ +1.1565780e+05 \ +1.2018630e+05 \ +1.1897610e+05 \ +1.1903480e+05 \ +1.1720600e+05 \ +1.1617150e+05 \ +1.1891860e+05 \ +1.1845590e+05 \ +1.2000110e+05 \ +1.2214310e+05 \ +1.1975200e+05 \ +# 1.1346470e+05 \ +1.1620620e+05 \ +1.1830490e+05 \ +1.1808630e+05 \ +1.2040340e+05 \ +1.1557270e+05 \ +1.1942410e+05 \ +1.2000610e+05 \ +1.1913920e+05 \ +1.1626730e+05 \ +1.1678470e+05 \ +1.1866980e+05 \ +1.1614830e+05 \ +1.1907920e+05 \ +1.2200930e+05 \ +1.1555510e+05 \ +1.1874630e+05 \ +1.1761290e+05 \ +1.1633250e+05 \ +1.1569840e+05 \ +1.1811300e+05 \ +1.2042880e+05 \ +# 1.2397450e+05 \ +1.1947070e+05 \ +1.1912180e+05 \ +1.1823130e+05 \ +1.1605520e+05 \ +1.1826570e+05 \ +1.2000610e+05 \ +1.2284260e+05 \ +1.2068500e+05 \ +1.1977290e+05 \ +1.1803400e+05 \ +1.1440320e+05 \ +1.1864930e+05 \ +1.1518050e+05 \ +1.1870510e+05 \ +1.2175740e+05 \ +1.1683440e+05 \ +1.1837050e+05 \ +1.1765380e+05 \ +1.1752970e+05 \ +1.1687510e+05 \ +1.1622520e+05 \ +1.2023250e+05 \ +1.2023250e+05 \ +1.1507660e+05 \ +1.1694320e+05 \ +1.2099540e+05 \ +1.2099540e+05 \ +1.1736870e+05 \ +1.1872570e+05 \ +1.1832650e+05 \ +1.1895560e+05 \ +1.1637960e+05 \ +1.1874120e+05 \ +1.1763330e+05 \ +1.1717690e+05 \ +1.2124180e+05 \ +1.1673760e+05 \ +1.2220710e+05 \ +1.1715660e+05 \ +1.1767830e+05 \ +1.1819020e+05 \ +1.1594300e+05 \ +1.1691070e+05 \ +1.1592660e+05 \ +1.2243840e+05 \ +1.1727380e+05 \ +1.1768610e+05 \ +1.1790240e+05 \ +1.1792710e+05 \ +# 1.1435350e+05 \ +1.2057840e+05 \ +1.2571320e+05 \ +1.3337100e+05 \ +1.3007430e+05 \ +# 1.3735960e+05 \ +1.3362100e+05 \ +1.3786660e+05 \ +1.3532500e+05 \ +1.3462670e+05 \ +1.3338250e+05 \ +1.3795710e+05 \ +1.3795710e+05 \ +1.3521270e+05 \ +1.3619470e+05 \ +1.3397130e+05 \ +1.3641330e+05 \ +1.3722930e+05 \ +1.3050840e+05 \ +1.3721850e+05 \ +1.3576570e+05 \ +1.3612360e+05 \ +1.3599550e+05 \ +1.3556850e+05 \ +1.3648950e+05 \ +1.3294630e+05 \ +# 1.4010900e+05 \ +1.3634020e+05 \ +1.3594790e+05 \ +1.3300160e+05 \ +1.3781680e+05 \ +1.3568250e+05 \ +1.3340100e+05 \ +1.3578420e+05 \ +1.3269520e+05 \ +1.3807850e+05 \ +1.3674950e+05 \ +1.3330670e+05 \ +1.3583980e+05 \ +1.3201220e+05 \ +1.3486940e+05 \ +1.3821540e+05 \ +1.3590300e+05 \ +1.3471510e+05 \ +1.3474190e+05 \ +1.3653280e+05 \ +1.3731090e+05 \ +1.3775460e+05 \ +1.3708000e+05 \ +1.3469500e+05 \ +1.3592550e+05 \ +1.3552260e+05 \ +1.3248210e+05 \ +1.3530710e+05 \ +1.3457610e+05 \ +1.3303630e+05 \ +1.3414820e+05 \ +1.3561380e+05 \ +1.3491940e+05 \ +# 1.3878270e+05 \ +# 1.3840480e+05 \ +# 1.3728630e+05 \ +1.3036000e+05 \ +1.2644740e+05 \ +# 1.1928140e+05 \ +1.2001040e+05 \ +1.2340690e+05 \ +# 1.1719700e+05 \ +1.2237740e+05 \ +1.2299440e+05 \ +1.1927610e+05 \ +1.2231920e+05 \ +1.1862190e+05 \ +1.1841370e+05 \ +1.1742340e+05 \ +1.2431660e+05 \ +1.1873290e+05 \ +1.2126650e+05 \ +1.1835030e+05 \ +1.1709180e+05 \ +1.1962440e+05 \ +1.1962440e+05 \ +1.1671210e+05 \ +1.1985130e+05 \ +1.1985130e+05 \ +1.1627230e+05 \ +1.1963300e+05 \ +1.1800500e+05 \ +1.1800500e+05 \ +1.1999920e+05 \ +1.1692690e+05 \ +1.2321970e+05 \ +1.1909310e+05 \ +1.2008860e+05 \ +1.1601730e+05 \ +1.1881570e+05 \ +1.2217420e+05 \ +1.1815140e+05 \ +1.1996240e+05 \ +1.1644440e+05 \ +1.2416880e+05 \ +# 1.1491530e+05 \ +1.1890500e+05 \ +1.1950350e+05 \ +1.2210420e+05 \ +1.2164860e+05 \ +1.2231850e+05 \ +1.1812570e+05 \ +1.1830900e+05 \ +1.2049170e+05 \ +1.1869520e+05 \ +1.2187070e+05 \ +1.2076460e+05 \ +1.1930710e+05 \ +1.1783940e+05 \ +1.2090410e+05 \ +1.1900870e+05 \ +# 1.1597820e+05 \ +1.1904630e+05 \ +1.2139260e+05 \ +1.1849700e+05 \ +1.1891090e+05 \ +1.1855670e+05 \ +# 1.1506520e+05 \ +1.2168590e+05 \ +1.2581380e+05 \ +1.3034220e+05 \ +1.2648300e+05 \ +1.2783900e+05 \ +1.3018010e+05 \ +1.3096300e+05 \ +1.2715640e+05 \ +1.3026830e+05 \ +1.3044510e+05 \ +1.2949860e+05 \ +1.3048700e+05 \ +1.2954290e+05 \ +1.2798650e+05 \ +1.2926230e+05 \ +1.3242080e+05 \ +1.2945540e+05 \ +# 1.3311290e+05 \ +1.2918910e+05 \ +1.3158780e+05 \ +1.2877620e+05 \ +1.2786050e+05 \ +1.2885610e+05 \ +1.2678290e+05 \ +1.2631730e+05 \ +1.2579400e+05 \ +1.2652790e+05 \ +1.2779450e+05 \ +1.2779600e+05 \ +1.2818630e+05 \ +1.3079280e+05 \ +1.2709070e+05 \ +1.2734030e+05 \ +1.2866540e+05 \ +1.3168930e+05 \ +1.2873960e+05 \ +1.2975590e+05 \ +1.3109510e+05 \ +1.2960900e+05 \ +1.2781750e+05 \ +1.2753540e+05 \ +1.3130090e+05 \ +1.2416470e+05 \ +1.3007430e+05 \ +1.3275300e+05 \ +1.3005980e+05 \ +1.2829450e+05 \ +1.2908610e+05 \ +# 1.3375430e+05 \ +1.2670200e+05 \ +1.2936750e+05 \ +1.3017880e+05 \ +1.2873960e+05 \ +1.3090600e+05 \ +1.3053030e+05 \ +1.3117950e+05 \ +1.2975590e+05 \ +1.3020320e+05 \ +1.2796490e+05 \ +1.2779620e+05 \ +1.2736210e+05 \ +1.1959590e+05 \ +# 1.1396910e+05 \ +# 1.1396910e+05 \ +1.1790970e+05 \ +1.1715980e+05 \ +1.1734460e+05 \ +1.1341210e+05 \ +1.1341210e+05 \ +1.1807200e+05 \ +1.1807200e+05 \ +1.1695140e+05 \ +1.1341210e+05 \ +1.1802350e+05 \ +1.1489550e+05 \ +1.1907530e+05 \ +1.1664990e+05 \ +1.1922250e+05 \ +1.1773580e+05 \ +1.1986170e+05 \ +1.1568570e+05 \ +1.1614830e+05 \ +1.1723060e+05 \ +1.1283980e+05 \ +1.1364450e+05 \ +1.1831100e+05 \ +1.1849970e+05 \ +1.1774340e+05 \ +1.1562490e+05 \ +1.1283980e+05 \ +1.1431820e+05 \ +1.1252680e+05 \ +1.1388580e+05 \ +1.1384060e+05 \ +1.1525700e+05 \ +1.1576820e+05 \ +1.1514410e+05 \ +1.1115070e+05 \ +1.1745460e+05 \ +1.1737020e+05 \ +1.1662430e+05 \ +1.1650450e+05 \ +1.1610970e+05 \ +1.1588600e+05 \ +1.1199540e+05 \ +1.1603400e+05 \ +1.1241150e+05 \ +1.1412750e+05 \ +1.1588600e+05 \ +1.1347970e+05 \ +1.1420590e+05 \ +1.1495170e+05 \ +1.1090080e+05 \ +1.1576820e+05 \ +1.1991850e+05 \ +1.1464830e+05 \ +1.1464640e+05 \ +1.1680060e+05 \ +1.1678730e+05 \ +1.1631280e+05 \ +1.1710390e+05 \ +1.1657440e+05 \ +1.1368470e+05 \ +1.1961160e+05 \ +1.1622180e+05 \ +1.1647750e+05 \ +1.1468810e+05 \ +1.1368120e+05 \ +1.1781110e+05 \ +1.1362110e+05 \ +1.1521670e+05 \ +1.1873260e+05 \ +# 1.1071300e+05 \ +1.1204520e+05 \ +1.1554260e+05 \ +1.1647630e+05 \ +1.1202550e+05 \ +1.1474230e+05 \ +1.1686870e+05 \ +1.1333190e+05 \ +1.1164440e+05 \ +1.1443710e+05 \ +1.1718020e+05 \ +1.1449630e+05 \ +1.1538740e+05 \ +1.1398560e+05 \ +1.1651420e+05 \ +1.1651420e+05 \ +1.1728040e+05 \ +1.1852620e+05 \ +1.1426630e+05 \ +1.1503790e+05 \ +1.1503790e+05 \ +1.1666410e+05 \ +1.1736870e+05 \ +1.1396490e+05 \ +1.1396490e+05 \ +1.1374370e+05 \ +1.1812540e+05 \ +1.1812540e+05 \ +1.1911780e+05 \ +1.1642240e+05 \ +1.1824950e+05 \ +1.1403450e+05 \ +1.1236660e+05 \ +1.1382540e+05 \ +1.1487530e+05 \ +1.1705490e+05 \ +1.1757980e+05 \ +1.1647670e+05 \ +1.1684130e+05 \ +1.1584930e+05 \ +1.1602050e+05 \ +1.1852470e+05 \ +1.1902300e+05 \ +1.1701250e+05 \ +1.1671090e+05 \ +1.1644280e+05 \ +1.1799680e+05 \ +1.1763330e+05 \ +1.1275860e+05 \ +1.1439690e+05 \ +1.1495170e+05 \ +1.1813450e+05 \ +1.1537170e+05 \ +1.1947600e+05 \ +1.1387770e+05 \ +# 1.2211900e+05 \ +1.2153840e+05 \ +1.1447740e+05 \ +1.1535850e+05 \ +1.1810680e+05 \ +1.1990290e+05 \ +1.1870510e+05 \ +1.1503400e+05 \ +1.2086150e+05 \ +1.1851880e+05 \ +1.1740300e+05 \ +1.1816010e+05 \ +1.1891880e+05 \ +1.1713100e+05 \ +1.1542130e+05 \ +1.1492430e+05 \ +1.1862740e+05 \ +1.1875310e+05 \ +1.1336300e+05 \ +# 1.2269160e+05 \ +# 1.1040910e+05 \ +1.1767430e+05 \ +1.1807200e+05 \ +1.1591380e+05 \ +1.1668990e+05 \ +1.1677200e+05 \ +1.1842940e+05 \ +1.1799510e+05 \ +# 1.1099130e+05 \ +1.1932500e+05 \ +1.1664080e+05 \ +1.1569340e+05 \ +1.1785460e+05 \ +1.1316120e+05 \ +1.1610320e+05 \ +1.1616630e+05 \ +# 1.2295830e+05 \ +1.1732420e+05 \ +1.1615560e+05 \ +1.1753620e+05 \ +1.1680500e+05 \ +1.1800550e+05 \ +1.1666320e+05 \ +1.1827000e+05 \ +1.1769470e+05 \ +1.1441700e+05 \ +1.1175420e+05 \ +1.1366460e+05 \ +1.1690940e+05 \ +1.1412140e+05 \ +1.1442540e+05 \ +1.1265740e+05 \ +1.1284820e+05 \ +1.1091210e+05 \ +# 1.0273490e+05 \ +# 8.6441490e+04 \ +# 8.6441490e+04 \ +# 6.9721540e+04 \ +# 6.4234380e+04 \ +# 6.4234380e+04 \ +# 8.1135480e+04 \ +# 8.1135480e+04 \ +# 1.0415760e+05 \ +# 1.0415760e+05 \ +1.0797020e+05 \ +# 1.0210710e+05 \ +1.0981200e+05 \ +1.0782720e+05 \ +1.0978700e+05 \ +1.0818520e+05 \ +1.1097080e+05 \ +1.0859530e+05 \ +1.1152380e+05 \ +1.1069850e+05 \ +1.1162450e+05 \ +1.1383430e+05 \ +1.1072110e+05 \ +# 1.0160150e+05 \ +1.1202530e+05 \ +1.0634370e+05 \ +1.1063900e+05 \ +1.0784530e+05 \ +1.1542130e+05 \ +1.0880460e+05 \ +1.1003850e+05 \ +# 1.0525750e+05 \ +1.1182500e+05 \ +1.0846200e+05 \ +1.0928740e+05 \ +1.1202570e+05 \ +1.1119230e+05 \ +1.0962190e+05 \ +1.0667740e+05 \ +1.1197570e+05 \ +1.1170470e+05 \ +1.1428110e+05 \ +1.0797830e+05 \ +1.1177950e+05 \ +1.1083070e+05 \ +1.1091130e+05 \ +1.1004720e+05 \ +1.1189460e+05 \ +1.1015610e+05 \ +# 1.1597990e+05 \ +1.1164850e+05 \ +1.1269610e+05 \ +1.0609670e+05 \ +1.1068020e+05 \ +1.1073950e+05 \ +1.1328330e+05 \ +1.0849360e+05 \ +1.1089960e+05 \ +1.1192580e+05 \ +1.0964570e+05 \ +1.1040910e+05 \ +1.1040910e+05 \ +1.1140700e+05 \ +1.1127180e+05 \ +1.1012980e+05 \ +1.1516460e+05 \ +1.1240810e+05 \ +1.1318450e+05 \ +# 1.0647570e+05 \ +1.1058870e+05 \ +1.1441190e+05 \ +1.1364340e+05 \ +1.1176970e+05 \ +1.1496610e+05 \ +1.0908600e+05 \ +1.1424080e+05 \ +1.0944610e+05 \ +1.1406830e+05 \ +1.1187480e+05 \ +1.1187480e+05 \ +1.1255850e+05 \ +1.1249730e+05 \ +1.0818520e+05 \ +1.1446560e+05 \ +1.1185480e+05 \ +1.1076070e+05 \ +1.0874580e+05 \ +1.1411550e+05 \ +1.1384560e+05 \ +1.1155560e+05 \ +1.0896610e+05 \ +1.1169420e+05 \ +1.1584410e+05 \ +1.1122680e+05 \ +1.1253850e+05 \ +1.1253850e+05 \ +1.1252760e+05 \ +1.1251850e+05 \ +1.1536380e+05 \ +1.0846200e+05 \ +1.1466020e+05 \ +1.1279980e+05 \ +1.0991490e+05 \ +1.1233720e+05 \ +1.1403660e+05 \ +1.1316120e+05 \ +1.1131160e+05 \ +1.1098400e+05 \ +1.1116170e+05 \ +1.1068780e+05 \ +1.1416790e+05 \ +1.1362280e+05 \ +1.1362280e+05 \ +1.1096120e+05 \ +1.0959800e+05 \ +1.0968970e+05 \ +1.0759050e+05 \ +1.0983590e+05 \ +1.0776770e+05 \ +1.1003480e+05 \ +1.0902210e+05 \ +1.1511490e+05 \ +1.1176380e+05 \ +1.0979640e+05 \ +1.0820000e+05 \ +1.0820000e+05 \ +1.1283980e+05 \ +1.0964160e+05 \ +1.1560960e+05 \ +1.1547120e+05 \ +1.1179950e+05 \ +1.0958530e+05 \ +1.1271030e+05 \ +1.1032670e+05 \ +1.0958970e+05 \ +1.0673340e+05 \ +1.0953730e+05 \ +1.1270370e+05 \ +1.0679630e+05 \ +1.0679630e+05 \ +1.1001180e+05 \ +1.1188930e+05 \ +1.1261850e+05 \ +1.1154320e+05 \ +1.1174410e+05 \ +1.0986420e+05 \ +1.1096030e+05 \ +1.0987540e+05 \ +1.1265740e+05 \ +1.1380670e+05 \ +1.1348260e+05 \ +1.1097460e+05 \ +1.1091210e+05 \ +# 1.1629720e+05 \ +# 1.1629720e+05 \ +1.1309330e+05 \ +1.0816820e+05 \ +1.1049060e+05 \ +1.0911370e+05 \ +1.1565780e+05 \ +# 1.1613650e+05 \ +1.1391190e+05 \ +1.1190450e+05 \ +1.1540900e+05 \ +1.0833100e+05 \ +1.1057960e+05 \ +1.1230760e+05 \ +1.1383820e+05 \ +1.0989510e+05 \ +1.0989510e+05 \ +1.1093110e+05 \ +1.1066950e+05 \ +1.0792940e+05 \ +1.1293990e+05 \ +1.0873260e+05 \ +1.1285050e+05 \ +1.1260890e+05 \ +1.1158310e+05 \ +1.1118290e+05 \ +1.1083870e+05 \ +1.1437020e+05 \ +1.0692730e+05 \ +1.1246260e+05 \ +1.0974440e+05 \ +1.1020880e+05 \ +1.1157370e+05 \ +1.0810080e+05 \ +1.0810080e+05 \ +1.0595190e+05 \ +1.0875770e+05 \ +1.1107200e+05 \ +1.0947720e+05 \ +1.0925970e+05 \ +1.0763510e+05 \ +1.0718900e+05 \ +1.0838230e+05 \ +1.0702720e+05 \ +1.0947720e+05 \ +1.1232670e+05 \ +1.1092220e+05 \ +1.1432260e+05 \ +1.1119430e+05 \ +1.1385020e+05 \ +1.0946460e+05 \ +1.0946460e+05 \ +1.1078560e+05 \ +1.0933960e+05 \ +1.1014950e+05 \ +1.0851560e+05 \ +1.0750430e+05 \ +1.1368350e+05 \ +1.1074090e+05 \ +1.1131230e+05 \ +1.0883790e+05 \ +1.1077910e+05 \ +1.1404250e+05 \ +1.1415900e+05 \ +1.1178500e+05 \ +1.1099060e+05 \ +1.1202550e+05 \ +1.1202550e+05 \ +1.0932060e+05 \ +1.0868990e+05 \ +1.1091210e+05 \ +1.1033400e+05 \ +1.0757670e+05 \ +1.0621390e+05 \ +1.0898650e+05 \ +1.0750430e+05 \ +1.0951780e+05 \ +1.0930090e+05 \ +1.1289990e+05 \ +1.0904670e+05 \ +1.0970100e+05 \ +1.0530200e+05 \ +1.0768800e+05 \ +1.0784010e+05 \ +1.0784010e+05 \ +# 1.1687510e+05 \ +1.0818520e+05 \ +1.1129860e+05 \ +1.0883610e+05 \ +1.0567990e+05 \ +1.0799570e+05 \ +1.1370440e+05 \ +1.1081100e+05 \ +1.0687400e+05 \ +1.0821130e+05 \ +1.1052730e+05 \ +1.1274910e+05 \ +1.1447740e+05 \ +1.1208630e+05 \ +1.1208660e+05 \ +1.1126310e+05 \ +1.1295850e+05 \ +1.1253850e+05 \ +1.1253850e+05 \ +1.1361220e+05 \ +1.1042730e+05 \ +1.0756410e+05 \ +1.0995260e+05 \ +1.0734620e+05 \ +1.0995960e+05 \ +1.0630780e+05 \ +1.0562090e+05 \ +1.1214760e+05 \ +1.0900220e+05 \ +1.0983730e+05 \ +# 1.0301380e+05 \ +1.0830400e+05 \ +1.1114190e+05 \ +1.1045950e+05 \ +1.1045950e+05 \ +1.0558880e+05 \ +1.0863220e+05 \ +1.1296920e+05 \ +1.1067110e+05 \ +1.1064970e+05 \ +1.0747900e+05 \ +1.0868990e+05 \ +1.0818520e+05 \ +1.1212610e+05 \ +1.0907620e+05 \ +1.0774650e+05 \ +1.0771450e+05 \ +1.0683550e+05 \ +1.0742630e+05 \ +1.0955030e+05 \ +1.0959580e+05 \ +1.0959930e+05 \ +1.0959930e+05 \ +1.0680850e+05 \ +1.1154360e+05 \ +1.0963600e+05 \ +1.0850120e+05 \ +1.1044870e+05 \ +1.0912900e+05 \ +1.0854720e+05 \ +1.0924210e+05 \ +1.1083070e+05 \ +1.1138750e+05 \ +1.0866600e+05 \ +1.0589740e+05 \ +1.0796360e+05 \ +1.0710370e+05 \ +1.1123400e+05 \ +1.1123400e+05 \ +1.0690680e+05 \ +1.0752410e+05 \ +1.0852770e+05 \ +1.0740470e+05 \ +1.1166460e+05 \ +1.1145210e+05 \ +1.1077650e+05 \ +1.1018900e+05 \ +1.1052960e+05 \ +1.0883610e+05 \ +1.0789150e+05 \ +1.0818520e+05 \ +1.0818520e+05 \ +1.1169420e+05 \ +1.1164440e+05 \ +1.0967000e+05 \ +1.1133340e+05 \ +1.1226710e+05 \ +1.0675740e+05 \ +1.0808050e+05 \ +1.0918920e+05 \ +1.0868230e+05 \ +1.1016060e+05 \ +1.1041730e+05 \ +1.1023730e+05 \ +1.1100740e+05 \ +1.1205520e+05 \ +1.1089150e+05 \ +1.0894350e+05 \ +1.1429490e+05 \ +1.1073030e+05 \ +1.1073030e+05 \ +1.1268820e+05 \ +1.1234690e+05 \ +1.1155380e+05 \ +1.1118290e+05 \ +1.1194590e+05 \ +1.0846200e+05 \ +1.1072110e+05 \ +1.1245130e+05 \ +1.1020010e+05 \ +1.1111590e+05 \ +1.1237900e+05 \ +1.0878910e+05 \ +1.0802250e+05 \ +1.1162490e+05 \ +1.1119430e+05 \ +1.1370130e+05 \ +1.1450100e+05 \ +1.1450100e+05 \ +# 1.1812230e+05 \ +1.1080960e+05 \ +1.1444040e+05 \ +# 1.1815400e+05 \ +# 1.1904360e+05 \ +1.1419230e+05 \ +1.1258890e+05 \ +1.1042890e+05 \ +1.1018690e+05 \ +# 1.0261100e+05 \ +# 1.0197200e+05 \ +1.0829650e+05 \ +1.0849990e+05 \ +1.0757670e+05 \ +1.1039700e+05 \ +1.1039700e+05 \ +1.1061010e+05 \ +1.0929220e+05 \ +1.1158740e+05 \ +1.1070490e+05 \ +1.0880860e+05 \ +1.1466440e+05 \ +# 1.0608290e+05 \ +1.1036820e+05 \ +1.0744000e+05 \ +1.0869760e+05 \ +1.0791110e+05 \ +1.0791110e+05 \ +1.1162290e+05 \ +1.1095090e+05 \ +1.1092270e+05 \ +1.1285990e+05 \ +1.0736560e+05 \ +1.1092440e+05 \ +1.1281500e+05 \ +1.0981110e+05 \ +1.1131120e+05 \ +1.1131120e+05 \ +1.1411910e+05 \ +1.1141230e+05 \ +1.1204570e+05 \ +1.1138710e+05 \ +1.1622860e+05 \ +1.0998970e+05 \ +1.1520510e+05 \ +1.0674870e+05 \ +1.1067860e+05 \ +1.1307980e+05 \ +1.1459510e+05 \ +1.1476880e+05 \ +1.0846530e+05 \ +1.0846530e+05 \ +1.1212650e+05 \ +1.1159270e+05 \ +1.1005780e+05 \ +1.1300740e+05 \ +1.1467210e+05 \ +1.1582680e+05 \ +1.1051720e+05 \ +1.0952450e+05 \ +1.1049910e+05 \ +1.0999660e+05 \ +1.1168400e+05 \ +1.1525340e+05 \ +1.1253860e+05 \ +1.1253860e+05 \ +1.1488790e+05 \ +1.1236680e+05 \ +1.1100850e+05 \ +1.0992360e+05 \ +1.0928080e+05 \ +# 1.1714310e+05 \ +1.0933990e+05 \ +1.1592440e+05 \ +1.1480290e+05 \ +1.1036630e+05 \ +1.1081880e+05 \ +1.1274370e+05 \ +1.1319400e+05 \ +1.1554970e+05 \ +1.1477070e+05 \ +1.1203790e+05 \ +1.1166410e+05 \ +1.1166410e+05 \ +1.1700490e+05 \ +1.0949780e+05 \ +1.1426090e+05 \ +1.1268380e+05 \ +1.1615560e+05 \ +1.0789870e+05 \ +1.0969650e+05 \ +1.1244900e+05 \ +1.1007570e+05 \ +# 1.1850700e+05 \ +1.1076990e+05 \ +1.1090900e+05 \ +1.0995710e+05 \ +1.1080830e+05 \ +1.1195580e+05 \ +1.1462640e+05 \ +1.1438640e+05 \ +1.1438640e+05 \ +1.1154320e+05 \ +1.1533580e+05 \ +1.1014730e+05 \ +1.1441190e+05 \ +1.1024080e+05 \ +1.1440370e+05 \ +1.1171390e+05 \ +1.1021760e+05 \ +1.1579590e+05 \ +1.0967390e+05 \ +1.1306300e+05 \ +1.1419600e+05 \ +1.1133340e+05 \ +1.1376640e+05 \ +1.1223740e+05 \ +1.1223740e+05 \ +1.1493160e+05 \ +1.1195600e+05 \ +1.1410690e+05 \ +1.0886930e+05 \ +1.1400790e+05 \ +1.1542930e+05 \ +1.1351690e+05 \ +1.0941460e+05 \ +1.1422910e+05 \ +1.1127610e+05 \ +1.1457950e+05 \ +1.1445720e+05 \ +1.1322510e+05 \ +1.1377780e+05 \ +1.1409840e+05 \ +1.0810080e+05 \ +1.0810080e+05 \ +1.1242810e+05 \ +1.1226760e+05 \ +1.1375630e+05 \ +1.1069850e+05 \ +1.1242810e+05 \ +1.1362440e+05 \ +1.1015610e+05 \ +1.1470580e+05 \ +1.1637250e+05 \ +1.1391590e+05 \ +1.1181510e+05 \ +1.0784230e+05 \ +1.0939210e+05 \ +1.1310320e+05 \ +1.1423230e+05 \ +1.1167870e+05 \ +1.1167870e+05 \ +1.1271970e+05 \ +1.1562990e+05 \ +1.1137390e+05 \ +1.1058870e+05 \ +1.0996820e+05 \ +1.1014950e+05 \ +1.1376640e+05 \ +1.0737470e+05 \ +1.1095090e+05 \ +1.1337210e+05 \ +1.1180510e+05 \ +1.1459140e+05 \ +1.1071560e+05 \ +1.0993280e+05 \ +1.0910500e+05 \ +1.0926100e+05 \ +1.0926100e+05 \ +1.1230670e+05 \ +1.1230670e+05 \ +1.1260250e+05 \ +1.1325230e+05 \ +1.1089520e+05 \ +1.1322680e+05 \ +1.1112760e+05 \ +1.0999530e+05 \ +1.1071300e+05 \ +1.1266790e+05 \ +1.0827090e+05 \ +1.1086110e+05 \ +1.1097080e+05 \ +1.1295060e+05 \ +1.1380530e+05 \ +1.1454220e+05 \ +1.1257850e+05 \ +# 1.0484600e+05 \ +# 1.0484600e+05 \ +1.1033960e+05 \ +1.0835640e+05 \ +1.1380530e+05 \ +1.1097280e+05 \ +1.0826210e+05 \ +1.1133190e+05 \ +1.1052540e+05 \ +1.1073540e+05 \ +1.1216570e+05 \ +1.0736070e+05 \ +1.0837480e+05 \ +1.0908980e+05 \ +1.0859970e+05 \ +1.0889240e+05 \ +1.1027160e+05 \ +1.1027160e+05 \ +1.0914500e+05 \ +1.0914500e+05 \ +1.0670360e+05 \ +1.1094050e+05 \ +1.1163460e+05 \ +1.1088090e+05 \ +1.1285970e+05 \ +1.1217760e+05 \ +1.0909050e+05 \ +1.0942780e+05 \ +1.1203990e+05 \ +1.1055600e+05 \ +1.1237630e+05 \ +1.1190460e+05 \ +1.0939850e+05 \ +1.0939850e+05 \ +1.0950000e+05 \ +1.1042190e+05 \ +1.0890810e+05 \ +1.0958180e+05 \ +1.1020240e+05 \ +1.0876960e+05 \ +1.0950000e+05 \ +1.1305440e+05 \ +1.1123290e+05 \ +1.0828430e+05 \ +1.1021760e+05 \ +1.1003860e+05 \ +1.1420590e+05 \ +1.1279980e+05 \ +1.1268920e+05 \ +1.1268920e+05 \ +1.1168880e+05 \ +1.1154360e+05 \ +1.1300130e+05 \ +1.1221610e+05 \ +1.0785240e+05 \ +1.0632540e+05 \ +1.1261850e+05 \ +1.1152380e+05 \ +1.0989510e+05 \ +1.1047230e+05 \ +1.0983590e+05 \ +1.0977530e+05 \ +1.0904300e+05 \ +1.1041100e+05 \ +1.1256800e+05 \ +1.1331190e+05 \ +1.1331190e+05 \ +1.1144300e+05 \ +1.1481320e+05 \ +1.1230630e+05 \ +1.1281040e+05 \ +1.0843730e+05 \ +1.1470190e+05 \ +1.0898090e+05 \ +1.1168450e+05 \ +1.1274260e+05 \ +# 1.0437090e+05 \ +1.1293860e+05 \ +1.0918430e+05 \ +1.1050990e+05 \ +1.0897870e+05 \ +1.0880880e+05 \ +1.1009770e+05 \ +1.1009770e+05 \ +1.0840950e+05 \ +1.0664490e+05 \ +1.1117250e+05 \ +1.1094430e+05 \ +1.1226690e+05 \ +1.1467210e+05 \ +1.1059030e+05 \ +1.0885570e+05 \ +1.0990770e+05 \ +1.0695240e+05 \ +1.0986830e+05 \ +1.1385900e+05 \ +1.1385900e+05 \ +1.1108240e+05 \ +1.1258800e+05 \ +1.0935920e+05 \ +# 1.1703980e+05 \ +1.1030890e+05 \ +1.0701080e+05 \ +1.1225670e+05 \ +1.1160330e+05 \ +1.1163500e+05 \ +1.1364450e+05 \ +1.1364450e+05 \ +1.1356290e+05 \ +1.1126650e+05 \ +1.0898090e+05 \ +1.0979920e+05 \ +1.1339210e+05 \ +1.0988800e+05 \ +1.1217760e+05 \ +1.0980070e+05 \ +1.0709000e+05 \ +1.1085420e+05 \ +1.1085420e+05 \ +1.0916460e+05 \ +1.0816560e+05 \ +1.1411550e+05 \ +1.0934770e+05 \ +1.1288920e+05 \ +1.1281850e+05 \ +1.0983590e+05 \ +1.0974880e+05 \ +1.0951960e+05 \ +1.0982880e+05 \ +1.0939850e+05 \ +1.0939530e+05 \ +1.0725660e+05 \ +1.0701740e+05 \ +1.1258890e+05 \ +1.0936740e+05 \ +1.0936740e+05 \ +1.0963310e+05 \ +1.1248770e+05 \ +1.0940930e+05 \ +1.1159910e+05 \ +1.0765460e+05 \ +1.0927370e+05 \ +1.0756410e+05 \ +1.1372370e+05 \ +1.1133270e+05 \ +1.0664540e+05 \ +1.1493970e+05 \ +1.1014950e+05 \ +1.0983590e+05 \ +1.1165490e+05 \ +1.0865410e+05 \ +1.0865410e+05 \ +1.1260800e+05 \ +1.0993280e+05 \ +1.0806820e+05 \ +1.1442030e+05 \ +1.1224710e+05 \ +1.1044970e+05 \ +1.1484470e+05 \ +1.1602410e+05 \ +1.0986830e+05 \ +1.1349370e+05 \ +1.0954590e+05 \ +1.1302750e+05 \ +1.1158340e+05 \ +1.1103350e+05 \ +1.1173400e+05 \ +# 1.0638250e+05 \ +1.1346260e+05 \ +1.1346260e+05 \ +1.1482310e+05 \ +1.1126220e+05 \ +1.1130810e+05 \ +1.1056500e+05 \ +1.0993280e+05 \ +# 9.7978340e+04 \ +# 9.5090230e+04 \ +# 9.9824350e+04 \ +1.1071390e+05 \ +1.0914500e+05 \ +1.0922500e+05 \ +1.0827120e+05 \ +1.1122260e+05 \ +1.0984860e+05 \ +1.1449270e+05 \ +1.1223660e+05 \ +1.1398770e+05 \ +1.1217620e+05 \ +1.1217620e+05 \ +1.1360430e+05 \ +1.1175390e+05 \ +1.1200580e+05 \ +1.1403660e+05 \ +1.1547890e+05 \ +# 1.2005440e+05 \ +1.1273040e+05 \ +1.1375630e+05 \ +1.1510130e+05 \ +1.1210710e+05 \ +1.1629490e+05 \ +1.1263020e+05 \ +# 1.1722490e+05 \ +1.1270150e+05 \ +1.1519770e+05 \ +1.1068510e+05 \ +1.1524560e+05 \ +1.1524560e+05 \ +1.1171390e+05 \ +1.1015490e+05 \ +1.1306140e+05 \ +1.1268900e+05 \ +1.1322310e+05 \ +1.0771620e+05 \ +1.1434160e+05 \ +1.1203560e+05 \ +1.1225730e+05 \ +1.1363570e+05 \ +1.1157330e+05 \ +1.1341420e+05 \ +1.0743680e+05 \ +1.1032670e+05 \ +1.1071040e+05 \ +1.1071040e+05 \ +1.0986260e+05 \ +1.1376510e+05 \ +1.1343920e+05 \ +1.1099060e+05 \ +1.1232690e+05 \ +1.1276920e+05 \ +1.0986420e+05 \ +# 1.0661230e+05 \ +1.0804200e+05 \ +1.1339210e+05 \ +1.1050810e+05 \ +1.1052790e+05 \ +1.0871300e+05 \ +1.1104590e+05 \ +1.0791770e+05 \ +1.0940350e+05 \ +1.0940350e+05 \ +1.1191040e+05 \ +1.1270370e+05 \ +1.1241750e+05 \ +# 1.0334310e+05 \ +# 1.0510780e+05 \ +1.1065140e+05 \ +1.1043970e+05 \ +1.1137250e+05 \ +1.0995960e+05 \ +1.0788640e+05 \ +1.1351070e+05 \ +1.0718070e+05 \ +1.0887530e+05 \ +1.0797620e+05 \ +1.1296920e+05 \ +1.0936410e+05 \ +1.0987680e+05 \ +1.1054040e+05 \ +1.1342110e+05 \ +1.1342110e+05 \ +1.1174410e+05 \ +1.0899450e+05 \ +1.1377860e+05 \ +1.1319100e+05 \ +1.0656650e+05 \ +1.1154410e+05 \ +1.1321320e+05 \ +1.1001020e+05 \ +1.0575860e+05 \ +1.1195560e+05 \ +1.1045060e+05 \ +1.1161340e+05 \ +1.1059030e+05 \ +1.0773400e+05 \ +1.0735390e+05 \ +1.0809850e+05 \ +1.0809850e+05 \ +1.0747220e+05 \ +1.1160330e+05 \ +1.1054040e+05 \ +1.0736790e+05 \ +1.0760300e+05 \ +1.0906450e+05 \ +1.1035580e+05 \ +1.0680300e+05 \ +1.1218590e+05 \ +1.1151410e+05 \ +1.0943970e+05 \ +1.0943970e+05 \ +1.1056170e+05 \ +1.0758350e+05 \ +1.0821970e+05 \ +1.1014090e+05 \ +1.0984010e+05 \ +1.0803990e+05 \ +1.0991890e+05 \ +1.1087250e+05 \ +1.0827960e+05 \ +1.1188490e+05 \ +1.0962940e+05 \ +1.0677000e+05 \ +1.1105210e+05 \ +1.1006930e+05 \ +1.0874860e+05 \ +1.0931980e+05 \ +1.0931980e+05 \ +1.0633880e+05 \ +1.0794410e+05 \ +1.0706740e+05 \ +1.0955350e+05 \ +1.0753440e+05 \ +1.0779800e+05 \ +1.0817220e+05 \ +# 1.0423620e+05 \ +1.0796980e+05 \ +1.1031640e+05 \ +1.1037020e+05 \ +# 1.0271120e+05 \ +1.0965030e+05 \ +1.1123390e+05 \ +1.0885570e+05 \ +1.0726330e+05 \ +1.0869760e+05 \ +1.0869760e+05 \ +1.0952610e+05 \ +1.0992160e+05 \ +1.1051890e+05 \ +1.1105210e+05 \ +1.0917630e+05 \ +1.1063310e+05 \ +1.1227660e+05 \ +1.1356180e+05 \ +1.0893550e+05 \ +1.0992580e+05 \ +1.1152840e+05 \ +1.1260800e+05 \ +1.1183490e+05 \ +1.1057230e+05 \ +1.0853520e+05 \ +1.0853520e+05 \ +1.0817300e+05 \ +1.1013850e+05 \ +1.1475050e+05 \ +1.1388310e+05 \ +1.1218590e+05 \ +1.1487300e+05 \ +1.1327230e+05 \ +1.1290130e+05 \ +1.1369060e+05 \ +1.1083310e+05 \ +1.1438240e+05 \ +1.1529530e+05 \ +1.1246080e+05 \ +1.1246080e+05 \ +1.1402090e+05 \ +1.1258510e+05 \ +1.0767410e+05 \ +# 1.0482600e+05 \ +# 9.8821820e+04 \ +# 1.0339850e+05 \ +1.1228640e+05 \ +1.1258800e+05 \ +1.0983740e+05 \ +1.0983740e+05 \ +1.1155380e+05 \ +1.1047750e+05 \ +1.0915660e+05 \ +1.0801180e+05 \ +1.1406580e+05 \ +1.1215210e+05 \ +1.1172840e+05 \ +1.1382140e+05 \ +1.1249800e+05 \ +1.0801000e+05 \ +1.1129290e+05 \ +1.1129290e+05 \ +1.1159320e+05 \ +# 1.0588190e+05 \ +1.0922560e+05 \ +1.1347940e+05 \ +1.1254890e+05 \ +1.1542460e+05 \ +1.1500790e+05 \ +1.1158310e+05 \ +1.1343430e+05 \ +1.1236760e+05 \ +1.1573140e+05 \ +1.1248900e+05 \ +1.1671730e+05 \ +1.1671730e+05 \ +1.0861260e+05 \ +1.0861260e+05 \ +1.1058480e+05 \ +1.1615690e+05 \ +1.1156320e+05 \ +1.1519720e+05 \ +1.1006690e+05 \ +# 1.1735740e+05 \ +1.1376710e+05 \ +1.1618800e+05 \ +1.1262890e+05 \ +1.1051720e+05 \ +1.1244810e+05 \ +1.0831140e+05 \ +# 1.1809870e+05 \ +# 1.1659370e+05 \ +1.0894350e+05 \ +1.1271970e+05 \ +1.1271970e+05 \ +1.1279040e+05 \ +1.1556000e+05 \ +1.0779960e+05 \ +1.1303980e+05 \ +1.1155800e+05 \ +1.1370480e+05 \ +1.1075010e+05 \ +1.1263970e+05 \ +1.1364280e+05 \ +1.0863880e+05 \ +1.1069090e+05 \ +1.1135330e+05 \ +1.1200990e+05 \ +1.0780760e+05 \ +1.1094670e+05 \ +1.0901790e+05 \ +1.0901790e+05 \ +1.0955890e+05 \ +1.1372130e+05 \ +1.1127260e+05 \ +1.1190460e+05 \ +1.0499120e+05 \ +1.0701080e+05 \ +1.0896690e+05 \ +1.1008670e+05 \ +1.0710340e+05 \ +1.0898260e+05 \ +1.0644890e+05 \ +1.0974880e+05 \ +1.0544620e+05 \ +1.0865080e+05 \ +1.0838810e+05 \ +1.1002990e+05 \ +1.1002990e+05 \ +1.0489380e+05 \ +# 1.0286260e+05 \ +# 1.0265780e+05 \ +1.0591670e+05 \ +1.0751010e+05 \ +1.1107310e+05 \ +1.0876540e+05 \ +1.1160560e+05 \ +1.0863130e+05 \ +1.0398360e+05 \ +1.0798140e+05 \ +1.0840160e+05 \ +1.0610990e+05 \ +1.1089150e+05 \ +1.1053810e+05 \ +1.1054700e+05 \ +1.0944660e+05 \ +1.0944660e+05 \ +1.0991890e+05 \ +1.0988230e+05 \ +1.0543700e+05 \ +1.0565950e+05 \ +1.0824420e+05 \ +1.0748390e+05 \ +1.0880100e+05 \ +1.0559080e+05 \ +1.0618890e+05 \ +1.0559620e+05 \ +1.0275770e+05 \ +1.0711270e+05 \ +1.0769360e+05 \ +1.0689000e+05 \ +1.0764080e+05 \ +1.0594050e+05 \ +1.0802760e+05 \ +1.0802760e+05 \ +1.0753680e+05 \ +1.0397330e+05 \ +1.0790330e+05 \ +1.0967310e+05 \ +1.0583710e+05 \ +1.0864210e+05 \ +# 1.0190030e+05 \ +1.0682560e+05 \ +1.0307960e+05 \ +1.0877730e+05 \ +1.0542760e+05 \ +1.0645560e+05 \ +1.0764080e+05 \ +1.1147510e+05 \ +1.0614280e+05 \ +1.0670360e+05 \ +1.0670360e+05 \ +# 1.1378050e+05 \ +1.0809850e+05 \ +1.0900220e+05 \ +1.0667550e+05 \ +# 1.0227900e+05 \ +1.0339770e+05 \ +1.0858460e+05 \ +1.0742340e+05 \ +1.0868120e+05 \ +1.0802040e+05 \ +1.0347610e+05 \ +1.0809850e+05 \ +1.0650230e+05 \ +1.0960470e+05 \ +1.0544690e+05 \ +1.0365350e+05 \ +1.0365350e+05 \ +1.0893950e+05 \ +1.0844200e+05 \ +1.0848100e+05 \ +1.1186490e+05 \ +1.0434330e+05 \ +1.0578480e+05 \ +1.0633880e+05 \ +1.0833080e+05 \ +1.0542760e+05 \ +1.0886350e+05 \ +1.0730250e+05 \ +1.0578120e+05 \ +1.0971230e+05 \ +1.0550020e+05 \ +1.1160560e+05 \ +1.0996230e+05 \ +1.1070000e+05 \ +1.0872900e+05 \ +1.0872900e+05 \ +1.0577660e+05 \ +1.1001020e+05 \ +1.0858770e+05 \ +1.1187510e+05 \ +1.0840160e+05 \ +1.0753770e+05 \ +1.0788970e+05 \ +1.0868990e+05 \ +1.0677370e+05 \ +1.1078810e+05 \ +1.1033960e+05 \ +1.1033960e+05 \ +1.1102430e+05 \ +1.1102430e+05 \ +1.0856970e+05 \ +1.1304240e+05 \ +1.0884790e+05 \ +1.0819740e+05 \ +1.1483510e+05 \ +1.0821700e+05 \ +1.1036460e+05 \ +1.1007370e+05 \ +1.1248690e+05 \ +1.0912180e+05 \ +1.1470190e+05 \ +1.1455930e+05 \ +1.1395610e+05 \ +1.1395610e+05 \ +1.1189470e+05 \ +1.1285990e+05 \ +1.1352490e+05 \ +1.1106090e+05 \ +1.1365920e+05 \ +1.1483260e+05 \ +# 1.1977260e+05 \ +1.1501370e+05 \ +# 1.1609030e+05 \ +1.0785470e+05 \ +1.0698500e+05 \ +# 9.9273530e+04 \ +# 9.2734640e+04 \ +# 9.7014920e+04 \ +1.0931520e+05 \ +1.0777850e+05 \ +1.0872490e+05 \ +1.0872490e+05 \ +1.1018690e+05 \ +1.1305980e+05 \ +1.1202550e+05 \ +1.1430650e+05 \ +1.1480910e+05 \ +1.1070000e+05 \ +1.1443710e+05 \ +1.1100190e+05 \ +1.1151830e+05 \ +1.1252680e+05 \ +1.1266380e+05 \ +1.1232810e+05 \ +1.1336020e+05 \ +1.0952110e+05 \ +1.0853520e+05 \ +1.0609830e+05 \ +1.0609830e+05 \ +1.1301850e+05 \ +1.1141290e+05 \ +1.1185480e+05 \ +1.1271770e+05 \ +1.1201560e+05 \ +1.1218590e+05 \ +1.0720360e+05 \ +1.0905850e+05 \ +1.0974880e+05 \ +1.0813440e+05 \ +1.0642360e+05 \ +1.0610410e+05 \ +1.0610410e+05 \ +1.0717090e+05 \ +1.1123290e+05 \ +1.0768800e+05 \ +1.0755040e+05 \ +1.1099130e+05 \ +1.0747900e+05 \ +1.0924950e+05 \ +1.0927020e+05 \ +1.1192030e+05 \ +1.0858730e+05 \ +1.0510430e+05 \ +1.0510430e+05 \ +1.1125270e+05 \ +1.1054940e+05 \ +1.1269870e+05 \ +1.0930090e+05 \ +1.1079200e+05 \ +# 1.1395610e+05 \ +# 1.1349370e+05 \ +# 1.1407530e+05 \ +# 1.0117850e+05 \ +# 9.4764450e+04 \ +# 8.8971340e+04 \ +# 8.1817020e+04 \ +# 8.1817020e+04 \ +# 7.9514520e+04 \ +# 7.4658690e+04 \ +# 7.2906820e+04 \ +# 7.4702950e+04 \ +# 7.2563320e+04 \ +# 6.7170910e+04 \ +# 6.7964210e+04 \ +# 6.7629860e+04 \ +# 6.9597940e+04 \ +# 7.0134870e+04 \ +# 7.0199210e+04 \ +# 7.1093280e+04 \ +# 6.9435120e+04 \ +# 6.9435120e+04 \ +# 8.1320570e+04 \ +# 8.8663800e+04 \ +# 9.9580970e+04 \ +# 1.0150090e+05 \ +1.0791760e+05 \ +1.1064540e+05 \ +# 1.1497650e+05 \ +1.1346340e+05 \ +1.1254810e+05 \ +1.0825000e+05 \ +1.0935310e+05 \ +1.0720080e+05 \ +1.1206510e+05 \ +1.1062430e+05 \ +1.0686120e+05 \ +1.1033070e+05 \ +1.1033070e+05 \ +1.0555570e+05 \ +1.0776600e+05 \ +# 1.1683440e+05 \ +1.1079200e+05 \ +1.1241750e+05 \ +1.1265840e+05 \ +1.1076490e+05 \ +1.1031980e+05 \ +1.0935590e+05 \ +1.0767700e+05 \ +1.1007240e+05 \ +1.1171850e+05 \ +1.1143270e+05 \ +1.0930700e+05 \ +1.1385830e+05 \ +1.1262940e+05 \ +1.0857760e+05 \ +1.0857760e+05 \ +1.0823920e+05 \ +1.1111390e+05 \ +1.1008040e+05 \ +1.1224690e+05 \ +1.1075160e+05 \ +1.1323960e+05 \ +# 1.1736210e+05 \ +1.1331190e+05 \ +1.1172420e+05 \ +1.1311980e+05 \ +# 1.1549760e+05 \ +1.0967000e+05 \ +1.1381170e+05 \ +1.1115070e+05 \ +1.1041100e+05 \ +1.1150860e+05 \ +1.1150860e+05 \ +1.0814330e+05 \ +1.1372370e+05 \ +1.1146280e+05 \ +1.0738150e+05 \ +1.0870530e+05 \ +1.0835520e+05 \ +1.0981760e+05 \ +1.1172420e+05 \ +1.1200550e+05 \ +1.1167440e+05 \ +1.1086110e+05 \ +1.1096970e+05 \ +1.1159320e+05 \ +1.1171400e+05 \ +1.1287050e+05 \ +1.1117250e+05 \ +1.0920190e+05 \ +1.0920190e+05 \ +1.1162450e+05 \ +1.0885180e+05 \ +1.0977690e+05 \ +1.0912530e+05 \ +1.0767220e+05 \ +1.0599510e+05 \ +1.0752750e+05 \ +# 1.0387140e+05 \ +1.0744370e+05 \ +# 1.0192100e+05 \ +# 9.9309860e+04 \ +# 1.0482680e+05 \ +# 9.9200820e+04 \ +# 1.0169960e+05 \ +# 1.0471490e+05 \ +# 1.0405560e+05 \ +1.0887530e+05 \ +1.1092060e+05 \ +1.1029800e+05 diff --git a/src/openalea/sequence_analysis/data/wij1.hsc b/src/openalea/sequence_analysis/data/wij1.hsc new file mode 100644 index 0000000..998c950 --- /dev/null +++ b/src/openalea/sequence_analysis/data/wij1.hsc @@ -0,0 +1,89 @@ +HIDDEN_SEMI-MARKOV_CHAIN + +8 STATES + +INITIAL_PROBABILITIES +0.5 0.5 0.0 0.0 0.0 0.0 0.0 0.0 + +TRANSITION_PROBABILITIES +0.0 0.4 0.3 0.3 0.0 0.0 0.0 0.0 +0.0 0.0 0.4 0.3 0.3 0.0 0.0 0.0 +0.0 0.0 0.0 0.4 0.3 0.3 0.0 0.0 +0.0 0.0 0.0 0.0 0.4 0.3 0.3 0.0 +0.0 0.0 0.0 0.0 0.0 0.4 0.3 0.3 +0.0 0.0 0.0 0.0 0.0 0.0 0.5 0.5 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 +0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 + +STATE 0 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 1 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 2 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 3 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 4 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 5 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +STATE 6 OCCUPANCY_DISTRIBUTION +NEGATIVE_BINOMIAL INF_BOUND : 1 PARAMETER : 1 PROBABILITY : 0.05 + +1 OUTPUT_PROCESS + +OUTPUT_PROCESS 1 : NONPARAMETRIC + +STATE 0 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 + +STATE 1 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.3 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 2 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.1 + +STATE 3 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.1 +OUTPUT 1 : 0.3 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 4 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.3 +OUTPUT 1 : 0.1 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.2 +OUTPUT 4 : 0.2 + +STATE 5 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.2 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.3 +OUTPUT 4 : 0.1 + +STATE 6 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 0.2 +OUTPUT 1 : 0.3 +OUTPUT 2 : 0.2 +OUTPUT 3 : 0.1 +OUTPUT 4 : 0.2 + +STATE 7 OBSERVATION_DISTRIBUTION +OUTPUT 0 : 1.0 diff --git a/src/openalea/sequence_analysis/data/wij1.seq b/src/openalea/sequence_analysis/data/wij1.seq new file mode 100644 index 0000000..7b852e4 --- /dev/null +++ b/src/openalea/sequence_analysis/data/wij1.seq @@ -0,0 +1,75 @@ +1 VARIABLE + +VARIABLE 1 : INT # 5 values + +# value histogram - size of the sample: 1088 +# mean: 1.40625 variance: 1.49258 standard deviation: 1.22171 + +# | value histogram +# 0 284 latent bud +# 1 440 short shoot +# 2 36 long shoot +# 3 294 fruiting shoot +# 4 34 immediate shoot + +# sequence length histogram - size of the sample: 19 +# mean: 57.2632 variance: 8.64912 standard deviation: 2.94094 + +# cumulative length: 1088 + +1 1 2 1 1 1 1 1 1 1 3 1 3 0 3 3 3 0 3 3 1 0 0 4 4 0 4 3 3 3 3 0 3 3 1 3 3 1 3 0 0 \ +3 0 0 0 0 0 0 0 0 0 0 + +3 1 1 1 1 1 1 1 1 1 3 3 3 1 3 3 3 4 1 3 3 3 3 3 3 3 0 0 3 3 3 3 3 3 3 3 2 3 3 1 3 \ +1 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 + +1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 3 0 0 0 0 0 0 0 0 0 4 4 4 4 3 4 3 3 3 3 3 3 1 1 \ +1 1 1 1 0 0 3 0 1 0 0 0 0 0 0 + +1 1 2 1 1 1 1 1 1 1 1 1 3 1 1 1 4 3 1 3 3 3 3 3 3 3 0 0 3 3 0 3 0 3 3 3 3 1 0 1 1 \ +2 2 2 2 1 1 1 1 2 1 0 1 0 0 0 0 0 0 + +0 0 0 2 1 1 1 1 3 1 1 1 3 3 3 3 3 3 3 0 3 0 3 0 3 3 4 3 0 0 0 3 1 1 1 0 3 3 3 3 3 \ +3 1 1 1 2 1 0 1 0 4 0 0 0 0 0 0 0 + +1 1 1 2 1 1 1 1 1 1 1 1 1 3 1 3 1 0 3 0 0 3 3 0 0 0 3 3 3 0 0 0 0 0 1 3 3 3 3 3 3 \ +3 3 3 3 1 1 1 1 3 3 0 0 0 0 0 0 0 0 0 0 + +0 0 1 1 1 1 1 1 1 1 1 1 3 1 1 3 3 3 3 1 3 0 0 0 3 3 1 0 0 1 1 1 1 3 3 3 3 1 1 1 1 \ +1 1 1 1 1 0 0 0 0 0 1 0 0 0 0 0 + +0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 4 1 3 3 0 3 3 3 1 3 3 3 0 3 3 1 0 1 3 3 0 3 0 3 \ +1 1 2 1 1 1 1 2 1 1 1 1 1 0 0 0 0 0 + +1 1 1 1 1 1 1 1 1 1 1 1 3 1 3 3 3 3 3 1 3 3 3 1 3 3 4 0 1 3 3 3 1 1 3 3 3 3 1 3 1 \ +1 1 1 1 0 1 1 0 3 3 2 1 0 0 0 0 0 0 + +2 1 1 1 1 1 1 1 3 1 3 3 3 3 1 0 3 1 1 0 0 0 0 4 0 1 0 1 3 3 3 3 1 3 1 1 3 0 1 1 1 \ +1 1 0 1 0 0 0 0 0 0 0 0 + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 4 1 1 4 1 1 1 1 1 1 1 3 1 1 1 0 1 1 1 \ +1 1 2 1 1 1 1 3 2 0 0 0 0 0 0 0 0 0 + +1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 1 1 3 3 4 0 1 1 4 3 4 0 3 3 1 1 1 3 3 3 3 3 2 0 3 0 \ +1 1 1 1 2 2 1 1 1 1 2 1 1 1 0 0 0 0 + +0 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 3 1 1 3 1 1 4 1 1 1 3 3 0 1 3 3 3 3 3 3 1 1 \ +1 1 2 2 1 2 2 1 2 0 2 2 1 0 0 0 0 0 0 0 0 + +0 0 3 3 3 3 3 3 3 3 3 3 3 3 3 3 0 0 4 0 3 4 4 3 3 4 3 0 0 3 3 3 1 3 3 3 1 1 2 1 1 \ +1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 + +0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 3 4 0 1 0 0 3 4 1 0 3 1 1 0 3 3 0 3 3 1 3 1 1 \ +1 1 1 1 1 1 1 2 1 1 1 2 0 0 0 0 0 0 0 + +2 1 1 1 1 1 1 1 1 1 1 1 4 1 1 3 1 1 1 1 3 3 1 3 3 1 0 1 3 3 1 3 3 3 3 3 3 0 1 3 3 \ +3 3 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 + +1 1 1 1 1 1 0 1 3 1 3 2 3 1 3 3 3 3 3 4 3 0 0 0 0 0 3 0 3 3 3 3 3 3 3 3 3 3 3 3 3 \ +1 3 1 0 0 0 0 0 0 0 0 + +0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 3 1 3 3 0 0 4 0 0 0 3 0 3 3 3 3 3 3 3 3 3 \ +3 0 1 1 1 0 0 0 0 0 0 0 0 0 0 + +1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 0 3 0 0 0 0 0 4 3 0 0 3 0 3 3 0 3 3 3 3 0 3 3 3 3 \ +1 0 0 0 0 0 0 0 0 0 0 0 diff --git a/src/wrapper/CMakeLists.txt b/src/wrapper/CMakeLists.txt new file mode 100644 index 0000000..5382584 --- /dev/null +++ b/src/wrapper/CMakeLists.txt @@ -0,0 +1,8 @@ +file(GLOB SOURCES "*.cpp") + + +target_sources( + _sequence_analysis + PRIVATE + ${SOURCES} +) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/_test_variable_order_markov.py b/test/_test_variable_order_markov.py index 243b339..6abf1ad 100644 --- a/test/_test_variable_order_markov.py +++ b/test/_test_variable_order_markov.py @@ -19,8 +19,8 @@ import openalea.stat_tool.plot #import DISABLE_PLOT openalea.stat_tool.plot.DISABLE_PLOT = True -from tools import interface -from tools import runTestClass +from .tools import interface +from .tools import runTestClass from openalea.sequence_analysis import get_shared_data diff --git a/test/functional1.py b/test/functional1.py deleted file mode 100644 index ef057bb..0000000 --- a/test/functional1.py +++ /dev/null @@ -1,97 +0,0 @@ -""" functional tests - - -.. todo:: to be done -""" -__revision__ = "$Id$" - -import os -from openalea.stat_tool import * -from openalea.sequence_analysis import * -from tools import runTestClass, robust_path as get_shared_data - -seq1 = Sequences(str(get_shared_data( 'dupreziana_20a2.seq'))) # correct -seq2 = RemoveRun(seq1, 1, 0, "End") # correct - -histo21 = ExtractHistogram(seq2, "Recurrence", 1) # correct -histo22 = ExtractHistogram(seq2, "Recurrence", 2) # correct - -seq3 = Sequences(str(get_shared_data( 'dupreziana_40a2.seq'))) #correct -seq4_0 = RemoveRun(seq3, 2, 0, "End") #correct -seq4 = SegmentationExtract(seq4_0, 1, 2) #correct - - -seq5 = Sequences(str(get_shared_data( 'dupreziana_60a2.seq'))) #correct -seq6_0 = RemoveRun(seq5, 2, 0, "End") #correct -seq6 = LengthSelect(SegmentationExtract(seq6_0, 1, 2), 1, Mode="Reject") #correct - - -seq7 = Sequences(str(get_shared_data( 'dupreziana_80a2.seq'))) #correct -seq8_0 = RemoveRun(seq7, 2, 0, "End") #correct -seq8 = SegmentationExtract(seq8_0, 1, 2) #correct - - -seq10 = Merge(seq2, seq4, seq6, seq8) - -seq10_1 = RecurrenceTimeSequences(seq10,1) -seq10_2 = RecurrenceTimeSequences(seq10,2) - - -vec10 = MergeVariable(ExtractVectors(seq10, "Length"),ExtractVectors(seq10, "NbOccurrence",1,1),ExtractVectors(seq10, "NbOccurrence",1,2),ExtractVectors(seq10, "Cumul")) - -seq11 = Transcode(seq10, [0, 1, 0]) -seq12 = Transcode(seq10, [0, 0, 1]) - -acf1 = Merge(ComputeCorrelation(seq11, MaxLag=15), ComputeCorrelation(seq12, MaxLag=15)) - -acf2 = Merge(ComputeCorrelation(seq11, Type="Spearman", MaxLag=15), - ComputeCorrelation(seq12, Type="Spearman", MaxLag=15)) -acf3 = Merge(ComputeCorrelation(seq11, Type="Kendall", MaxLag=15), - ComputeCorrelation(seq12, Type="Kendall", MaxLag=15)) - - - -WordCount(seq10, 3, BeginState=1, EndState=1, MinFrequency=10) -WordCount(seq10, 4, BeginState=2, EndState=2) -WordCount(seq10, 4, BeginState=2, EndState=1) - - - -mc10 = Estimate(seq10, "VARIABLE_ORDER_MARKOV", "Ordinary", MaxOrder=5, GlobalInitialTransition=True) - -mc11 = Estimate(seq10, "VARIABLE_ORDER_MARKOV", "Ordinary", MaxOrder=5, GlobalInitialTransition=False) - -Plot(mc11, "Intensity") - -mc12 = Estimate(seq10, "VARIABLE_ORDER_MARKOV", "Ordinary", Algorithm="LocalBIC", Threshold=10., MaxOrder=5, GlobalInitialTransition=False, GlobalSample=False) -mc13 = Estimate(seq10, "VARIABLE_ORDER_MARKOV", "Ordinary", Algorithm="Context", Threshold=1., MaxOrder=5, GlobalInitialTransition=False, GlobalSample=False) - -acf11 = ComputeAutoCorrelation(mc11, 1, MaxLag=20) -acf12 = ComputeAutoCorrelation(mc11, 2, MaxLag=20) - -mc2 = Estimate(seq2, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) -mc4 = Estimate(seq4, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) -mc6 = Estimate(seq6, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) -mc8 = Estimate(seq8, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) - - -#TODO compare functions crashes sometimes -#matrix1 = Compare(Thresholding(mc2, MinProbability=0.001), seq10, Thresholding(mc4, MinProbability=0.001), seq10, Thresholding(mc6, MinProbability=0.001), seq10, Thresholding(mc8, MinProbability=0.001), seq10, 10000) -#matrix2 = Compare(Thresholding(mc2, MinProbability=0.001), seq2, Thresholding(mc4, MinProbability=0.001), seq4, Thresholding(mc6, MinProbability=0.001), seq6, Thresholding(mc8, MinProbability=0.001), seq8, 10000) - - -#Compare(seq10, Thresholding(mc2, MinProbability=0.001), Thresholding(mc4, MinProbability=.001), Thresholding(mc6, MinProbability=0.001), Thresholding(mc8, MinProbability=0.001)) - - - -# test # -hmc9 = HiddenVariableOrderMarkov(str(get_shared_data( "dupreziana21.hc"))) -hmc10 = Estimate(seq10, "HIDDEN_VARIABLE_ORDER_MARKOV", hmc9, GlobalInitialTransition=True, NbIteration=80) -hmc11 = Estimate(seq10, "HIDDEN_VARIABLE_ORDER_MARKOV", hmc9, GlobalInitialTransition=False, NbIteration=80) - - -acf21 = ComputeAutoCorrelation(hmc11, 1, 1, MaxLag=20) -acf22 = ComputeAutoCorrelation(hmc11, 1, 2, MaxLag=20) - -seq15 = Simulate(hmc11, 10000, seq10) - diff --git a/test/functional2.py b/test/functional2.py deleted file mode 100644 index f467d2c..0000000 --- a/test/functional2.py +++ /dev/null @@ -1,141 +0,0 @@ -""" functional tests - -""" -__revision__ = "$Id$" - - - -from openalea.sequence_analysis import * -from openalea.sequence_analysis.estimate import Estimate -from openalea.sequence_analysis.compare import Compare -from tools import runTestClass, robust_path as get_shared_data - - -seq20 = Sequences(str(get_shared_data("belren1.seq"))) -seq21 = Sequences(str(get_shared_data("elstar1.seq"))) -seq22 = Sequences(str(get_shared_data("fuji1.seq"))) -seq23 = Sequences(str(get_shared_data("gala1.seq"))) -seq24 = Sequences(str(get_shared_data("granny1.seq"))) -seq25 = Sequences(str(get_shared_data("reinet1.seq"))) -seq26 = Sequences(str(get_shared_data("wij1.seq"))) - -Display(seq25, ViewPoint="Data") -Plot(seq25, "Intensity") -Plot(seq25, "Sojourn") - -seq26 = Reverse(seq25) -Plot(seq26, "Intensity") -Plot(seq26, "FirstOccurrence") - -# Sojourn time (run length) distributions - -seq30 = Merge(seq20, seq21, seq22, seq23, seq24, seq25) -Plot(seq30, "Sojourn") -Plot(ExtractHistogram(seq30, "Sojourn", 1), ExtractHistogram(seq30, "Sojourn", 2), ExtractHistogram(seq30, "Sojourn", 3), ExtractHistogram(seq30, "Sojourn", 4)) - -mc30 = Estimate(seq30, "VARIABLE_ORDER_MARKOV", "Ordinary", MaxOrder=4, GlobalInitialTransition=False) -mc30 = Estimate(seq30, "VARIABLE_ORDER_MARKOV", "Ordinary", MaxOrder=4, Algorithm="BIC", GlobalInitialTransition=False) -#todo empty plot -#Plot(mc30, "Sojourn") -Display(Estimate(seq30, "VARIABLE_ORDER_MARKOV", "Ordinary", Order=1)) -Display(Estimate(seq30, "VARIABLE_ORDER_MARKOV", "Ordinary", Order=2, GlobalInitialTransition=False)) - -seq31 = Cluster(seq30, "Limit", [1, 4]) -mc31 = Estimate(seq30, "VARIABLE_ORDER_MARKOV", "Ordinary", MaxOrder=4, GlobalInitialTransition=False) -mc31 = Estimate(seq31, "VARIABLE_ORDER_MARKOV", "Ordinary", Order=2, GlobalInitialTransition=False) -Plot(mc31, "Sojourn") -Display(Estimate(seq31, "VARIABLE_ORDER_MARKOV", "Ordinary", Order=1)) - -# comparison of sequences by dynamic programming algorithms - -seq32 = Merge(seq20, seq25) -matrix30 = Compare(seq32) -matrix31 = Compare(seq32, VectorDistance("S")) -matrix32 = Compare(seq32, VectorDistance("S"), Transposition=True) -matrix33 = Compare(seq32, VectorDistance(str(get_shared_data("test_align1.a"))), Transposition=True) - -Display(Clustering(matrix33, "Partition", 2)) -Clustering(matrix33, "Hierarchy", Algorithm="Agglomerative") -Clustering(matrix33, "Hierarchy", Algorithm="Divisive") - -# multiple alignment - -seq33 = Compare(SelectIndividual(seq25, [10, 11, 12, 14, 15]), VectorDistance("S"), Output="Sequences", Algorithm="Agglomerative") -seq34 = Compare(SelectIndividual(seq25, [10, 11, 12, 14, 15]), VectorDistance("S"), Output="Sequences", Algorithm="Divisive") -seq35 = Compare(SelectIndividual(seq25, [10, 11, 12, 14, 15]), VectorDistance("S"), Output="Sequences", Algorithm="Ordering") - -Compare(seq25, TestSequence=9, RefSequence=1) -Compare(seq25, VectorDistance("S"), TestSequence=9, RefSequence=1) -Compare(seq25, VectorDistance("S"), TestSequence=9, RefSequence=1, Transposition=True) - -# multiple change-point models - -Display(seq25, 14, 6, "Multinomial", ViewPoint="SegmentProfile") -Display(seq25, 14, 6, "Multinomial", ViewPoint="SegmentProfile", Output="ChangePoint") -Plot(seq25, 14, 6, "Multinomial", ViewPoint="SegmentProfile") -Plot(seq25, 14, 6, "Multinomial", ViewPoint="SegmentProfile", Output="ChangePoint") -# hidden semi-Markov chains - -hsmc0 = HiddenSemiMarkov(str(get_shared_data("belren1.hsc"))) -hsmc20 = Estimate(seq20, "HIDDEN_SEMI-MARKOV", hsmc0) - -hsmc0 = HiddenSemiMarkov(str(get_shared_data("elstar1.hsc"))) -hsmc21 = Estimate(seq21, "HIDDEN_SEMI-MARKOV", hsmc0) - -hsmc0 = HiddenSemiMarkov(str(get_shared_data("fuji1.hsc"))) -hsmc22 = Estimate(seq22, "HIDDEN_SEMI-MARKOV", hsmc0) - -hsmc0 = HiddenSemiMarkov(str(get_shared_data("gala1.hsc"))) -hsmc23 = Estimate(seq23, "HIDDEN_SEMI-MARKOV", hsmc0) - -hsmc0 = HiddenSemiMarkov(str(get_shared_data("granny1.hsc"))) -hsmc24 = Estimate(seq24, "HIDDEN_SEMI-MARKOV", hsmc0) - -hsmc0 = HiddenSemiMarkov(str(get_shared_data("reinet1.hsc"))) -hsmc25 = Estimate(seq25, "HIDDEN_SEMI-MARKOV", hsmc0) - -Display(hsmc25) -Plot(hsmc25, "Intensity", 1) -Plot(hsmc25, "FirstOccurrence", 1) -Plot(hsmc25, "Counting", 1) - -# state -Plot(hsmc25, "Intensity") -Plot(hsmc25, "Sojourn") -# observed -Plot(hsmc25, "Sojourn",1) - -Plot(hsmc25, 1, ViewPoint="StateProfile") -Plot(hsmc25, 1, ViewPoint="StateProfile", Output='InState') -Plot(hsmc25, 1, ViewPoint="StateProfile", Output='OutState') - -seq25_1 = ExtractData(hsmc25) -Display(seq25_1, ViewPoint="Data", Format="Line") - -hsmc0 = HiddenSemiMarkov(str(get_shared_data("wij1.hsc"))) -hsmc26 = Estimate(seq26, "HIDDEN_SEMI-MARKOV", hsmc0) - -# model comparison - -#Thresholding(hsmc20, MinProbability=0.001) -#Thresholding(hsmc21, MinProbability=0.001) -#Thresholding(hsmc22, MinProbability=0.001) -#Thresholding(hsmc23, MinProbability=0.001) -#Thresholding(hsmc24, MinProbability=0.001) -#Thresholding(hsmc25, MinProbability=0.001) -#Thresholding(hsmc26, MinProbability=0.001) - - -#matrix20 = Compare(Thresholding(hsmc22, MinProbability=0.001), seq22, 10000) - -#matrix20 = Compare(Thresholding(hsmc20, MinProbability=0.001), seq20, Thresholding(hsmc21, MinProbability=0.001), seq21, Thresholding(hsmc22, MinProbability=0.001), seq22, Thresholding(hsmc24, MinProbability=0.001), seq24, Thresholding(hsmc25, MinProbability=0.001), seq25, Thresholding(hsmc26, MinProbability=0.001), seq26, 10000) - -#TODO unstable the line above works, the line below does not -#matrix20 = Compare(Thresholding(hsmc20, MinProbability=0.001), seq20, Thresholding(hsmc21, MinProbability=0.001), seq21, Thresholding(hsmc22, MinProbability=0.001), seq22, Thresholding(hsmc23, MinProbability=0.001), seq23, Thresholding(hsmc24, MinProbability=0.001), seq24, Thresholding(hsmc25, MinProbability=0.001), seq25, Thresholding(hsmc26, MinProbability=0.001), seq26, 10000, FileName="ASCII/cultivar1_models.txt") - -# may be slow -#matrix21 = Compare(Thresholding(hsmc20, MinProbability=0.001), Thresholding(hsmc21, MinProbability=0.001), Thresholding(hsmc22, MinProbability=0.001), Thresholding(hsmc22, MinProbability=0.001), Thresholding(hsmc24, MinProbability=0.001), Thresholding(hsmc25, MinProbability=0.001), Thresholding(hsmc26, MinProbability=0.001), 100, 90) -#matrix21 = Compare(Thresholding(hsmc20, MinProbability=0.001), Thresholding(hsmc21, MinProbability=0.001), Thresholding(hsmc22, MinProbability=0.001), Thresholding(hsmc22, MinProbability=0.001), Thresholding(hsmc24, MinProbability=0.001), Thresholding(hsmc25, MinProbability=0.001), Thresholding(hsmc26, MinProbability=0.001), 100, 90, FileName="ASCII/cultivar1_models_90.txt") - - -#Plot(matrix20) diff --git a/test/test_add_absorbing_run.py b/test/test_add_absorbing_run.py index 2f1754f..a80c1c3 100644 --- a/test/test_add_absorbing_run.py +++ b/test/test_add_absorbing_run.py @@ -1,93 +1,136 @@ """tests on the method AddAbsorbingRun -.. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr +.. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr -.. todo:: markov case ? +.. todo:: markov case ? """ + __revision__ = "$Id$" +import pytest + +from dataclasses import dataclass +from typing import Any + from openalea.sequence_analysis.sequences import Sequences from openalea.sequence_analysis.semi_markov import SemiMarkov from openalea.sequence_analysis.data_transform import AddAbsorbingRun -from tools import runTestClass, robust_path as get_shared_data +from openalea.sequence_analysis import get_shared_data + +MAX_RUN_LENGTH = 20 # hardcoded values in CPP code + + +@dataclass +class AbsorbingData: + data: Any + max_length: float + max_run_length: float + + +@pytest.fixture(params=["raw", "sequences", "semimarkov"]) +def AddAbsorbingRunData(request): + if request.param == "raw": + return AbsorbingData(None, -1, 20) + elif request.param == "sequences": + return Sequences(str(get_shared_data("sequences1.seq"))) + elif request.param == "semimarkov": + markov = SemiMarkov(str(get_shared_data("test_semi_markov.dat"))) + return markov.simulation_nb_elements(1, 1000, True) -class _AddAbsorbingRun(): + +class TestAddAbsorbingRun: """ - a main class to test the AddAbsorbingrun function on different type of + a main class to test the AddAbsorbingrun function on different type of data structure. - + """ - def __init__(self): - self.data = None - self.max_length = -1 - self.MAX_RUN_LENGTH = 20 # hardcoded values in CPP code - - def test_max_length(self): - seq = self.data + def test_max_length(self, AddAbsorbingRunData): + seq = AddAbsorbingRunData.data assert seq.max_length == self.max_length - - - def test_boost_versus_module(self): - seq = self.data + + def test_boost_versus_module(self, AddAbsorbingRunData): + seq = AddAbsorbingRunData.data sequence_length = -1 run_length = 6 - + boost = seq.add_absorbing_run(sequence_length, run_length) - module1 = AddAbsorbingRun(seq, - SequenceLength=sequence_length, - RunLength=run_length) + module1 = AddAbsorbingRun( + seq, SequenceLength=sequence_length, RunLength=run_length + ) module2 = AddAbsorbingRun(seq, RunLength=run_length) - assert str(module1)==str(boost) - assert str(module2)==str(boost) - - def test_no_arguments(self): - seq = self.data + assert str(module1) == str(boost) + assert str(module2) == str(boost) + + def test_no_arguments(self, AddAbsorbingRunData): + seq = AddAbsorbingRunData.data assert AddAbsorbingRun(seq) - - def test_wrong_run_length(self): - seq = self.data + + def test_wrong_run_length(self, AddAbsorbingRunData): + seq = AddAbsorbingRunData.data try: - #second arguments must be less than MAX_RU_LENGTH - _res = AddAbsorbingRun(seq, -1, self.MAX_RUN_LENGTH + 1) + # second arguments must be less than MAX_RU_LENGTH + _res = AddAbsorbingRun(seq, -1, AddAbsorbingRunData["MAX_RUN_LENGTH"] + 1) assert False except Exception: assert True - def test_wrong_sequence_length(self): - seq = self.data + def test_wrong_sequence_length(self, AddAbsorbingRunData): + seq = AddAbsorbingRunData.data try: - #second arguments must be less than MAX_RU_LENGTH - _res = AddAbsorbingRun(seq, self.max_length -1, -1) + # second arguments must be less than MAX_RU_LENGTH + _res = AddAbsorbingRun(seq, AddAbsorbingRunData["max_length"] - 1, -1) assert False except Exception: assert True -class Test_AddAbsorbingRun_Sequences(_AddAbsorbingRun): - """sequences case""" - def __init__(self): - _AddAbsorbingRun.__init__(self) - self.data = self.create_data() - self.max_length = 30 - - def create_data(self): - seq = Sequences(str(get_shared_data('sequences1.seq'))) - return seq - -class Test_AddAbsorbingRun_SemiMarkov(_AddAbsorbingRun): - """semi markov case""" - def __init__(self): - _AddAbsorbingRun.__init__(self) - self.data = self.create_data() - self.max_length = 1000 - - def create_data(self): - markov = SemiMarkov(str(get_shared_data('test_semi_markov.dat'))) - semi_markov_data = markov.simulation_nb_elements(1, 1000, True) - return semi_markov_data - -if __name__ == "__main__": - runTestClass(Test_AddAbsorbingRun_Sequences()) - runTestClass(Test_AddAbsorbingRun_SemiMarkov()) - \ No newline at end of file + +@pytest.fixture +def seq(): + return Sequences(str(get_shared_data("sequences1.seq"))) + + +@pytest.fixture +def semi_markov(): + markov = SemiMarkov(str(get_shared_data("test_semi_markov.dat"))) + return markov.simulation_nb_elements(1, 1000, True) + + +def test_max_length_seq(seq): + assert seq.max_length == 30 + + +def test_boost_versus_module_seq(seq): + sequence_length = -1 + run_length = 6 + + boost = seq.add_absorbing_run(sequence_length, run_length) + module1 = AddAbsorbingRun(seq, SequenceLength=sequence_length, RunLength=run_length) + module2 = AddAbsorbingRun(seq, RunLength=run_length) + + assert str(module1) == str(boost) + assert str(module2) == str(boost) + + +def test_no_arguments_seq(seq): + assert AddAbsorbingRun(seq) + + +def test_wrong_run_length_seq(seq): + try: + # second arguments must be less than MAX_RU_LENGTH + _res = AddAbsorbingRun(seq, -1, MAX_RUN_LENGTH + 1) + assert False + except Exception: + assert True + + +def test_wrong_sequence_length_seq(seq): + try: + # second arguments must be less than MAX_RU_LENGTH + max_length = 30 + _res = AddAbsorbingRun(seq, max_length - 1, -1) + assert False + except Exception: + assert True diff --git a/test/test_build_auxialiary_variable.py b/test/test_build_auxialiary_variable.py index 03d610b..e6cd66a 100644 --- a/test/test_build_auxialiary_variable.py +++ b/test/test_build_auxialiary_variable.py @@ -1,5 +1,5 @@ from openalea.sequence_analysis import * -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data def test1(): seq6 = Sequences(str(get_shared_data("pin_laricio_6.seq"))) @@ -18,4 +18,4 @@ def test1(): Plot(SelectIndividual(seq31, [95]), ViewPoint="Data") if __name__ == "__main__": - test1() \ No newline at end of file + test1() diff --git a/test/test_cluster.py b/test/test_cluster.py index a75eee5..e3eabb9 100644 --- a/test/test_cluster.py +++ b/test/test_cluster.py @@ -1,198 +1,101 @@ -""" Cluster tests +"""Cluster tests .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr .. todo:: check the AddVariable option (sequences) and sequences cases """ + __revision__ = "$Id$" import os + +import pytest from openalea.sequence_analysis.sequences import Sequences -#from openalea.sequence_analysis.semi_markov import SemiMarkov + +# from openalea.sequence_analysis.semi_markov import SemiMarkov from openalea.stat_tool.cluster import Cluster from openalea.stat_tool.histogram import Histogram from openalea.stat_tool.convolution import Convolution from openalea.stat_tool.compound import Compound from openalea.stat_tool.vectors import Vectors -from tools import runTestClass, robust_path as get_shared_data - -class _Cluster(): - """Test class to test cluster function and classes - - create_data, cluster_step and cluster_limit funciton will be required - """ - - def __init__(self): - self.data = None - - def create_data(self): - raise NotImplemented - - def test_cluster_step(self): - raise NotImplemented - - def test_cluster_limit(self): - raise NotImplemented - -class _HistoCase(_Cluster): - """ - inherits from _cluster and implements the cluster_limit and cluster_step - functions. - - In addition, classes that inherits from _HistoCase must implement - cluster_information - """ - def __init__(self): - _Cluster.__init__(self) - self.data = None - - def test_cluster_step(self): - cluster1 = Cluster(self.data, "Step", 2) - cluster2 = self.data.cluster_step(2) - assert str(cluster1) == str(cluster2) - - def test_cluster_limit(self): - cluster1 = Cluster(self.data, "Limit", [2, 4, 6, 8, 10]) - cluster2 = self.data.cluster_limit([2, 4, 6, 8, 10]) - assert str(cluster1) == str(cluster2) - - def test_cluster_information(self): - cluster1 = Cluster(self.data, "Information", 0.8) - cluster2 = self.data.cluster_information(0.8) - assert str(cluster1) == str(cluster2) - - -class TestHistogram(_HistoCase): - - def __init__(self): - _HistoCase.__init__(self) - self.data = self.create_data() - - def create_data(self): - return Histogram(str(get_shared_data( 'fagus1.his'))) - - -class TestConvolution( _HistoCase): - - def __init__(self): - _HistoCase.__init__(self) - self.data = self.create_data() - - def create_data(self): - conv = Convolution(str(get_shared_data('test_convolution1.conv'))) - return conv.simulate(1000) - - -class TestCompound(_HistoCase): - - def __init__(self): - _HistoCase.__init__(self) - self.data = self.create_data() +from .tools import runTestClass, robust_path as get_shared_data - def create_data(self): - comp = Compound(str(get_shared_data('test_compound1.cd'))) - return comp.simulate(1000) +@pytest.fixture +def create_data_vectorsn(): + return Vectors([[1, 2, 3], [1, 3, 1], [4, 5, 6]]) -class TestVectorsn(_Cluster): - def __init__(self): - _Cluster.__init__(self) - self.data = self.create_data() +def test_cluster_step_vectorsn(create_data_vectorsn): + data = create_data_vectorsn + cluster1 = data.cluster_step(1, 2) + cluster2 = Cluster(data, "Step", 1, 2) + assert str(cluster1) == str(cluster2) - def create_data(self): - v = Vectors([[1, 2, 3], [1, 3, 1], [4, 5, 6]]) - return v - def test_cluster_step(self): - data = self.data - cluster1 = data.cluster_step(1, 2) - cluster2 = Cluster(data, "Step", 1, 2) - assert str(cluster1) == str(cluster2) +def test_cluster_limit_vectorsn(create_data_vectorsn): + data = create_data_vectorsn + cluster1 = data.cluster_limit(1, [2, 4, 6]) + cluster2 = Cluster(data, "Limit", 1, [2, 4, 6]) + assert str(cluster1) == str(cluster2) - def test_cluster_limit(self): - data = self.data - cluster1 = data.cluster_limit(1, [2, 4, 6]) - cluster2 = Cluster(data, "Limit", 1, [2, 4, 6]) - assert str(cluster1) == str(cluster2) -class TestVectors1(_Cluster): +@pytest.fixture +def create_data_vector1(): + return Vectors([[1], [1], [4]]) - def __init__(self): - _Cluster.__init__(self) - self.data = self.create_data() - def create_data(self): - v = Vectors([[1], [1], [4]]) - return v +def test_cluster_step_vector1(create_data_vector1): + data = create_data_vector1 + cluster1 = data.cluster_step(1, 2) + cluster2 = Cluster(data, "Step", 2) + assert str(cluster1) == str(cluster2) - def test_cluster_step(self): - data = self.data - cluster1 = data.cluster_step(1, 2) - cluster2 = Cluster(data, "Step", 2) - assert str(cluster1) == str(cluster2) - def test_cluster_limit(self): - data = self.data - cluster1 = data.cluster_limit(1, [2, 4, 6]) - cluster2 = Cluster(data, "Limit", [2, 4, 6]) - assert str(cluster1) == str(cluster2) +def test_cluster_limit_vector1(create_data_vector1): + data = create_data_vector1 + cluster1 = data.cluster_limit(1, [2, 4, 6]) + cluster2 = Cluster(data, "Limit", [2, 4, 6]) + assert str(cluster1) == str(cluster2) -class TestSequences1(_Cluster): +@pytest.fixture +def create_data_sequences1(): + return Sequences(str(get_shared_data("sequences1.seq"))) - def __init__(self): - _Cluster.__init__(self) - self.data = self.create_data() - def create_data(self): - data = Sequences(str(get_shared_data('sequences1.seq'))) - return data +def test_cluster_step_sequences1(create_data_sequences1): + data = create_data_sequences1 + mode = False + cluster1 = data.cluster_step(1, 2, mode) + cluster2 = Cluster(data, "Step", 2) + assert str(cluster1) == str(cluster2) - def test_cluster_step(self): - data = self.data - mode = False - cluster1 = data.cluster_step(1, 2, mode) - cluster2 = Cluster(data, "Step", 2) - assert str(cluster1) == str(cluster2) - def test_cluster_limit(self): - data = self.data - print(data.nb_variable) - cluster1 = data.cluster_limit(1,[2], False) - cluster2 = Cluster(data,"Limit", [2] , AddVariable=False) - assert str(cluster1) == str(cluster2) +def test_cluster_limit_sequences1(create_data_sequences1): + data = create_data_sequences1 + print(data.nb_variable) + cluster1 = data.cluster_limit(1, [2], False) + cluster2 = Cluster(data, "Limit", [2], AddVariable=False) + assert str(cluster1) == str(cluster2) -class TestSequencesn(_Cluster): +@pytest.fixture +def create_data_sequencen(): + return Sequences(str(get_shared_data("sequences2.seq"))) - def __init__(self): - _Cluster.__init__(self) - self.data = self.create_data() - def create_data(self): - data = Sequences(str(get_shared_data('sequences2.seq'))) - return data +def test_cluster_step_sequencen(create_data_sequencen): + data = create_data_sequencen + mode = True + cluster1 = data.cluster_step(1, 2, mode) + cluster2 = Cluster(data, "Step", 1, 2) + assert str(cluster1) == str(cluster2) - def test_cluster_step(self): - data = self.data - mode = True - cluster1 = data.cluster_step(1, 2, mode) - cluster2 = Cluster(data, "Step", 1, 2) - assert str(cluster1) == str(cluster2) - def test_cluster_limit(self): - data = self.data - cluster1 = data.cluster_limit(1, [2 ], True) - cluster2 = Cluster(data, "Limit", 1, [2]) - assert str(cluster1) == str(cluster2) - -if __name__ == "__main__": - runTestClass(TestVectors1()) - runTestClass(TestVectorsn()) - runTestClass(TestSequences1()) - runTestClass(TestSequencesn()) - runTestClass(TestConvolution()) - runTestClass(TestCompound()) - runTestClass(TestHistogram()) +def test_cluster_limit_sequencen(create_data_sequencen): + data = create_data_sequencen + cluster1 = data.cluster_limit(1, [2], True) + cluster2 = Cluster(data, "Limit", 1, [2]) + assert str(cluster1) == str(cluster2) diff --git a/test/test_compare.py b/test/test_compare.py index 6b29593..c7be12a 100644 --- a/test/test_compare.py +++ b/test/test_compare.py @@ -4,8 +4,11 @@ .. todo:: systematic tests """ + __revision__ = "$Id$" +import pytest + from openalea.sequence_analysis.sequences import Sequences from openalea.sequence_analysis.hidden_semi_markov import HiddenSemiMarkov from openalea.sequence_analysis.compare import Compare @@ -15,13 +18,15 @@ from openalea.sequence_analysis.estimate import Estimate from openalea.sequence_analysis.data_transform import Thresholding -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data -class _Compare(): + +class _Compare: """ a main class to test the Compare function on different type of data structure. """ + def __init__(self): self.data = None @@ -32,105 +37,105 @@ def test_compare(self): raise NotImplemented +@pytest.fixture +def create_data_compare_histogram(): + seq0 = Sequences(str(get_shared_data("chene_sessile_15pa.seq"))) + vec10 = Vectors(seq0) + vec95 = ValueSelect(vec10, 1, 95) + vec96 = ValueSelect(vec10, 1, 96) + vec97 = ValueSelect(vec10, 1, 97) + return [vec95, vec96, vec97] -class Test_Compare_Histograms(_Compare): - def __init__(self): - _Compare.__init__(self) - self.data = self.create_data() - def create_data(self): - seq0 = Sequences(str(get_shared_data( "chene_sessile_15pa.seq"))) - vec10 = Vectors(seq0) - vec95 = ValueSelect(vec10, 1, 95) - vec96 = ValueSelect(vec10, 1, 96) - vec97 = ValueSelect(vec10, 1, 97) - return([vec95, vec96, vec97]) +def test_compare_compare_histogram(create_data_compare_histogram): + seq = create_data_compare_histogram + res = Compare( + ExtractHistogram(seq[0], 2), + ExtractHistogram(seq[1], 2), + ExtractHistogram(seq[2], 2), + "N", + ) + assert res - def test_compare(self): - seq = self.data - res = Compare(ExtractHistogram(seq[0], 2), - ExtractHistogram(seq[1], 2), - ExtractHistogram(seq[2], 2), "N") - assert res +@pytest.fixture +def create_data_sequences(): + return Sequences(str(get_shared_data("dupreziana_a1.seq"))) -class Test_Compare_Sequences(_Compare): - def __init__(self): - _Compare.__init__(self) - self.data = self.create_data() +def test_compare_sequences(create_data_sequences): + seq = create_data_sequences + matrix20 = Compare(seq) + assert matrix20 - def create_data(self): - seq = Sequences(str(get_shared_data( 'dupreziana_a1.seq'))) - return(seq) - def test_compare(self): - seq = self.data - matrix20 = Compare(seq) - assert matrix20 +@pytest.fixture +def create_data_vectordistance(): + return Sequences(str(get_shared_data("dupreziana_a1.seq"))) -class Test_Compare_Sequences_VectorDistance(_Compare): +def test_compare_vectordistance(create_data_vectordistance): + seq = create_data_vectordistance + matrix20 = Compare(seq, VectorDistance("N", "N")) + assert matrix20 - def __init__(self): - _Compare.__init__(self) - self.data = self.create_data() - def create_data(self): - seq = Sequences(str(get_shared_data( 'dupreziana_a1.seq'))) - return(seq) +@pytest.fixture +def create_data_vectors_vectordistance(): + return Vectors([[1, 2, 3], [1, 3, 1], [4, 5, 6]]) - def test_compare(self): - seq = self.data - matrix20 = Compare(seq, VectorDistance("N", "N")) - assert matrix20 +def test_compare_vectors_vectordistance(create_data_vectors_vectordistance): + data = create_data_vectors_vectordistance + a = Compare(data, VectorDistance("N", "N", "N")) + assert a.nb_row == 3 + assert a.nb_column == 3 -class Test_Compare_Vectors_VectorDistance(_Compare): - def __init__(self): - _Compare.__init__(self) - self.data = self.create_data() +@pytest.fixture +def create_data_compare_hsmc_with_sequences(): + hsmc0 = HiddenSemiMarkov(str(get_shared_data("belren1.hsc"))) + hsmc1 = HiddenSemiMarkov(str(get_shared_data("elstar1.hsc"))) + seq0 = Sequences(str(get_shared_data("belren1.seq"))) + seq1 = Sequences(str(get_shared_data("elstar1.seq"))) + data0 = Estimate(seq0, "HIDDEN_SEMI-MARKOV", hsmc0) + data1 = Estimate(seq1, "HIDDEN_SEMI-MARKOV", hsmc1) + return [seq0, seq1, data0, data1] - def create_data(self): - data = Vectors([[1, 2, 3], [1, 3, 1], [4, 5, 6]]) - return(data) - - def test_compare(self): - data = self.data - a = Compare(data, VectorDistance("N", "N", "N")) - assert a.nb_row == 3 - assert a.nb_column == 3 - +def test_compare_compare_hsmc_with_sequences(create_data_compare_hsmc_with_sequences): + data = create_data_compare_hsmc_with_sequences + matrix20 = Compare( + Thresholding(data[2], MinProbability=0.001), + data[0], + Thresholding(data[3], MinProbability=0.001), + data[1], + 10000, + ) + assert matrix20 class Test_Compare_hsmc_with_sequences(_Compare): - def __init__(self): _Compare.__init__(self) self.data = self.create_data() def create_data(self): - hsmc0 = HiddenSemiMarkov(str(get_shared_data( "belren1.hsc"))) + hsmc0 = HiddenSemiMarkov(str(get_shared_data("belren1.hsc"))) hsmc1 = HiddenSemiMarkov(str(get_shared_data("elstar1.hsc"))) - seq0 = Sequences(str(get_shared_data( "belren1.seq"))) - seq1 = Sequences(str(get_shared_data( "elstar1.seq"))) + seq0 = Sequences(str(get_shared_data("belren1.seq"))) + seq1 = Sequences(str(get_shared_data("elstar1.seq"))) data0 = Estimate(seq0, "HIDDEN_SEMI-MARKOV", hsmc0) data1 = Estimate(seq1, "HIDDEN_SEMI-MARKOV", hsmc1) - return([seq0, seq1, data0, data1]) + return [seq0, seq1, data0, data1] def test_compare(self): data = self.data - matrix20 = Compare(Thresholding(data[2], MinProbability=0.001), data[0], - Thresholding(data[3], MinProbability=0.001), data[1], - 10000) + matrix20 = Compare( + Thresholding(data[2], MinProbability=0.001), + data[0], + Thresholding(data[3], MinProbability=0.001), + data[1], + 10000, + ) assert matrix20 - - -if __name__ == "__main__": - runTestClass(Test_Compare_Histograms()) - runTestClass(Test_Compare_Sequences()) - runTestClass(Test_Compare_Sequences_VectorDistance()) - runTestClass(Test_Compare_Vectors_VectorDistance()) - runTestClass(Test_Compare_hsmc_with_sequences()) diff --git a/test/test_compute_self_transition.py b/test/test_compute_self_transition.py index 9a28816..7be8406 100644 --- a/test/test_compute_self_transition.py +++ b/test/test_compute_self_transition.py @@ -2,12 +2,10 @@ .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr """ -__revision__ = "$Id$" from openalea.sequence_analysis.data_transform import ComputeSelfTransition -from openalea.sequence_analysis import Sequences -from tools import runTestClass, robust_path as get_shared_data +from openalea.sequence_analysis import Sequences, get_shared_data seq1 = Sequences(str(get_shared_data("sequences1.seq"))) seqn = Sequences(str(get_shared_data("sequences2.seq"))) @@ -25,6 +23,3 @@ def test_ComputeSelfTransition_order(): the order arguments is protected...""" ComputeSelfTransition(seqn, Order=2) -if __name__ == "__main__": - test_ComputeSelfTransition() - test_ComputeSelfTransition_order() diff --git a/test/test_correlation.py b/test/test_correlation.py index 12c96a8..053e8a7 100644 --- a/test/test_correlation.py +++ b/test/test_correlation.py @@ -1,9 +1,10 @@ -""" Tests on ComputeAutoCorrelation, ComputeParialAutoCorrelation, +"""Tests on ComputeAutoCorrelation, ComputeParialAutoCorrelation, ComputewhiteNoiseCorrelation .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr """ + __revision__ = "$Id$" @@ -15,45 +16,47 @@ from openalea.sequence_analysis.correlation import ComputePartialAutoCorrelation from openalea.stat_tool.distribution import Distribution -from tools import runTestClass -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass +from .tools import runTestClass, robust_path as get_shared_data -class Data(): +class Data: def __init__(self): - self.sequence = self.create_sequence_data() self.type_map = type_dict def create_sequence_data(self): - - seq66 = Sequences(str(get_shared_data( "laricio_date66.seq"))) - seq69 = MovingAverage(VariableScaling(seq66, 3, 100), - Distribution("B", 0, 6, 0.5), BeginEnd=True, - Output="Residual") + seq66 = Sequences(str(get_shared_data("laricio_date66.seq"))) + seq69 = MovingAverage( + VariableScaling(seq66, 3, 100), + Distribution("B", 0, 6, 0.5), + BeginEnd=True, + Output="Residual", + ) return seq69 + def CorrelationData(index=1): """Returns a correlation index from 1 to 3""" - seq66 = Sequences(str(get_shared_data( "laricio_date66.seq"))) + seq66 = Sequences(str(get_shared_data("laricio_date66.seq"))) ret = ComputeCorrelation(seq66, index) return ret - - class TestComputeCorrelation(Data): - def __init__(self): Data.__init__(self) self.variable = 2 - def compute_correlation_type(self, variable, type, MaxLag=10, - Normalization="Exact"): + + def compute_correlation_type( + self, variable, type, MaxLag=10, Normalization="Exact" + ): seq = self.sequence - cf = ComputeCorrelation(seq, variable, - Type=type, MaxLag=MaxLag, Normalization=Normalization) + cf = ComputeCorrelation( + seq, variable, Type=type, MaxLag=MaxLag, Normalization=Normalization + ) assert cf.type == self.type_map[type] return cf @@ -62,9 +65,9 @@ def test_correlation_no_optional_arguments(self): cf = ComputeCorrelation(seq, self.variable) def test_spearman(self): - seq = Sequences(str(get_shared_data( "laricio_date66.seq"))) + seq = Sequences(str(get_shared_data("laricio_date66.seq"))) ComputeCorrelation(seq, 1, Type="Spearman") - ComputeCorrelation(seq, 1, 2,Type="Spearman") + ComputeCorrelation(seq, 1, 2, Type="Spearman") try: dummy = 3 ComputeCorrelation(seq, 1, 2, dummy, Type="Spearman") @@ -89,9 +92,7 @@ def test_norm3(self): assert True - class TestComputeWhiteNoiseCorrelation(TestComputeCorrelation): - def __init__(self): TestComputeCorrelation.__init__(self) self.correlation = self.test_pearson() @@ -106,10 +107,10 @@ def test_order(self): def test_distribution(self): data = self.correlation - ComputeWhiteNoiseCorrelation(data , Distribution("BINOMIAL", 0,4,0.5)) + ComputeWhiteNoiseCorrelation(data, Distribution("BINOMIAL", 0, 4, 0.5)) -class TestComputePartialAutoCorrelation(Data): +class TestComputePartialAutoCorrelation(Data): def __init__(self): Data.__init__(self) diff --git a/test/test_cumulate.py b/test/test_cumulate.py index fe6ce8d..c86f18b 100644 --- a/test/test_cumulate.py +++ b/test/test_cumulate.py @@ -1,45 +1,36 @@ -""" Cumulate tests +"""Cumulate tests .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr .. todo : in general, variable index starts at 1 when calling Cumulate! -Do we want to start at 0 ? Since later on, python calls will +Do we want to start at 0 ? Since later on, python calls will start the index at 0 ? - + """ + __revision__ = "$Id$" from openalea.sequence_analysis.data_transform import Cumulate -from tools import runTestClass, robust_path as get_shared_data +from .tools import robust_path as get_shared_data from openalea.sequence_analysis import Sequences seqn = Sequences(str(get_shared_data("sequences2.seq"))) seq1 = Sequences(str(get_shared_data("sequences1.seq"))) - - - def test_cumulate1(): data = seq1 a = Cumulate(data) b = data.cumulate(1).markovian_sequences() - + assert str(a) == str(b) assert a.get_max_value(0) == 29 assert b.get_max_value(0) == 29 - + assert data.get_max_value(0) == 2 def test_cumulaten(): - for var in range(1, seqn.nb_variable+1): + for var in range(1, seqn.nb_variable + 1): assert str(seqn.cumulate(var).markovian_sequences()) == str(Cumulate(seqn, var)) - - -if __name__ == "__main__": - test_cumulate1() - test_cumulaten() - - diff --git a/test/test_data.py b/test/test_data.py index db4ec70..85a7020 100644 --- a/test/test_data.py +++ b/test/test_data.py @@ -1,10 +1,13 @@ -from tools import runTestClass, robust_path as get_shared_data +from .tools import robust_path as get_shared_data def test_get_shared_data(): from openalea.sequence_analysis.sequences import Sequences - seq = Sequences(str(get_shared_data('wij1.seq'))) - assert(seq) -if __name__ == "__main__": - test_get_shared_data() \ No newline at end of file + seq = Sequences(str(get_shared_data("wij1.seq"))) + assert seq + + +##if __name__ == "__main__": +## test_get_shared_data() + diff --git a/test/test_data_transform.py b/test/test_data_transform.py index a2463a0..5d3e736 100644 --- a/test/test_data_transform.py +++ b/test/test_data_transform.py @@ -3,91 +3,107 @@ .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr """ -__revision__ = "$Id$" - - -from openalea.sequence_analysis import get_shared_data -from openalea.sequence_analysis.data_transform import * -from openalea.sequence_analysis.sequences import Sequences -from openalea.stat_tool import Mixture -from openalea.stat_tool import Distribution -from openalea.stat_tool import Simulate -from openalea.stat_tool import Merge -from openalea.stat_tool import SelectStep -from openalea.stat_tool import Plot -from openalea.stat_tool import Display -from openalea.stat_tool import Vectors -from tools import runTestClass, robust_path as get_shared_data - -class TestRemoveRun(): - def __init__(self): - self.data = Sequences(str(get_shared_data("sequences1.seq"))) - - def _test_sequences_1(self): - - seq1 = self.data - seq2 = seq1.remove_run(1, 0,"e",2) - seq3 = RemoveRun(seq1,1, 0,"e", MaxLength=2) - assert str(seq3)==str(seq2) +__revision__ = "$Id$" - def test_incorrect_value(self): - seq1 = self.data +import pytest + +from openalea.sequence_analysis import ( + ComputeStateSequences, + RemoveRun, + Sequences, + TransitionCount, +) +from openalea.stat_tool import ( + Display, + Distribution, + Merge, + Mixture, + Plot, + SelectStep, + Simulate, + Vectors, +) +from .tools import robust_path as get_shared_data + + +@pytest.fixture +def create_data_sequence(): + return Sequences(str(get_shared_data("sequences1.seq"))) + + +@pytest.fixture +def create_data_sequence2(): + return Sequences(str(get_shared_data("sequences2.seq"))) + + +class TestRemoveRun: + def test_sequences_1(self, create_data_sequence): + seq1 = create_data_sequence + seq2 = seq1.remove_run(1, 0, "e", 2) + seq3 = RemoveRun(seq1, 1, 0, "e", MaxLength=2) + assert str(seq3) == str(seq2) + + def test_incorrect_value(self, create_data_sequence): + seq1 = create_data_sequence try: - seq1.remove_run(1,3,'e',10) + seq1.remove_run(1, 3, "e", 10) assert False except: assert True try: - seq1.remove_run(1,-1,'e',10) + seq1.remove_run(1, -1, "e", 10) assert False except: assert True - def test_incorrect_variable(self): - seq1 = self.data + def test_incorrect_variable(self, create_data_sequence): + seq1 = create_data_sequence try: - seq1.remove_run(0,2,'e',10) + seq1.remove_run(0, 2, "e", 10) assert False except: assert True - def _test_sequences_2(self): - seq1 = Sequences(str(get_shared_data("sequences2.seq"))) - seq2 = seq1.remove_run(1, 0,"e",2) - seq3 = RemoveRun(seq1,1, 0,"e",2) - assert str(seq3)==str(seq2) + def test_sequences_2(self, create_data_sequence2): + seq1 = create_data_sequence2 + seq2 = seq1.remove_run(1, 0, "e", 2) + seq3 = RemoveRun(seq1, 1, 0, "e", 2) + assert str(seq3) == str(seq2) def test_markov_data(): """not implemented""" pass + def test_semi_markov_data(): """not implemented""" pass + def test_discrete_sequences(): """not implemented""" pass + def test_compute_state_sequence(): from openalea.sequence_analysis import HiddenSemiMarkov - seq = Sequences(str(get_shared_data( "wij1.seq"))) - hsmc0 = HiddenSemiMarkov(str(get_shared_data( "wij1.hsc"))) + + seq = Sequences(str(get_shared_data("wij1.seq"))) + hsmc0 = HiddenSemiMarkov(str(get_shared_data("wij1.hsc"))) ComputeStateSequences(seq, hsmc0, Algorithm="ForwardBackward", Characteristics=True) def test_transition_count(): - seq = Sequences(str(get_shared_data( "wij1.seq"))) - TransitionCount(seq, 5, Begin=True, Estimator="MaximumLikelihood", - Filename = "ASCII") + seq = Sequences(str(get_shared_data("wij1.seq"))) + TransitionCount(seq, 5, Begin=True, Estimator="MaximumLikelihood", Filename="ASCII") def test_merge(): - - mixt1 = Mixture(0.6, Distribution("B", 2, 18, 0.5), - 0.4, Distribution("NB", 10, 10, 0.5)) + mixt1 = Mixture( + 0.6, Distribution("B", 2, 18, 0.5), 0.4, Distribution("NB", 10, 10, 0.5) + ) mixt_histo1 = Simulate(mixt1, 200) @@ -103,22 +119,22 @@ def test_select_step(): """ ######################################################################### # - # Well-log data; used in Fearnhead and Clifford "On-line Inference for + # Well-log data; used in Fearnhead and Clifford "On-line Inference for # Hidden Markov Models via Particle Filters". Measurements of Nuclear-response - # of a well-bore over time. Data from O Ruanaidh, J. J. K. and - # Fitzgerald, W. J. (1996). "Numerical Bayesion Methods Applied to Signal + # of a well-bore over time. Data from O Ruanaidh, J. J. K. and + # Fitzgerald, W. J. (1996). "Numerical Bayesion Methods Applied to Signal # Processing". New York: Springer. # ######################################################################### """ - seq1 = Sequences(str(get_shared_data( "well_log_filtered.seq"))) + seq1 = Sequences(str(get_shared_data("well_log_filtered.seq"))) Plot(seq1, ViewPoint="Data") Plot(seq1) SelectStep(seq1, 1000) Plot(seq1) - #Display(seq1, 1, 17, "Gaussian", ViewPoint="SegmentProfile", NbSegmentation=5) + # Display(seq1, 1, 17, "Gaussian", ViewPoint="SegmentProfile", NbSegmentation=5) Plot(seq1, 1, 17, "Gaussian", ViewPoint="SegmentProfile") # seq20 = Segmentation(seq1, 1, 20, "Gaussian") @@ -134,8 +150,3 @@ def test_select_step(): SelectStep(vec1, 1000) Plot(vec1) - -if __name__ == "__main__": - runTestClass(test_merge()) - runTestClass(TestRemoveRun()) - test_compute_state_sequence() diff --git a/test/test_dataflow_stat.py b/test/test_dataflow_stat.py index a9d5150..3b39584 100644 --- a/test/test_dataflow_stat.py +++ b/test/test_dataflow_stat.py @@ -1,52 +1,50 @@ -from openalea.core.alea import * -# !!important!! import dataflowview, which defines the fields of each nodes -#from openalea.grapheditor import dataflowview +try: + from openalea.core.alea import * + # !!important!! import dataflowview, which defines the fields of each nodes + #from openalea.grapheditor import dataflowview + core = True +except ImportError: + core = False + +if core: + pm = PackageManager() + pm.init(verbose=True) + + # These tests use gnuplot interface, which requires human interaction + # Consequently, they cannot be used within builbot (which hangs forever) + # We added a flags inside aml/src/aml/wralea/py_stat.py to prevent gnuplot + # to be launched if these tests are run with nosetests. The remaining of the + # nodes are run. + # In order to have the gnuplot interface, run this script with python instead of nosetests + + def test_demo_corsican(): + """ Test changepoint demo corsican """ + res = run(('demo.changepoint_stat_tool','Corsican pine change point'),{},pm=pm) + assert res == [] + + def test_demo_dycorinia(): + """ Test dataflow demo dycorinia """ + res = run(('demo.changepoint_stat_tool','Dycorinia change point'),{},pm=pm) + assert res == [] + + def test_oak_demo(): + """ Test dataflow demo oak""" + res = run(('demo.changepoint_stat_tool', 'oak_demo'),{},pm=pm) + assert res == [] + + def test_beech1_demo(): + """ Test dataflow demo beech""" + res = run(('demo.changepoint_stat_tool', 'beech1'),{},pm=pm) + assert res == [] + + def test_stat_tool_demos_and_tutorial_convolution(): + """ Test dataflow demo compound tutorial""" + res = run(('demo.stat_tool demos and tutorials', 'convolution_tutorial'),{},pm=pm) + assert res == [] + + + def test_stat_tool_demos_and_tutorial_compound(): + res = run(('demo.stat_tool demos and tutorials', 'compound_tutorial'),{},pm=pm) + assert res == [] -pm = PackageManager() -pm.init(verbose=True) - -# These tests use gnuplot interface, which requires human interaction -# Consequently, they cannot be used within builbot (which hangs forever) -# We added a flags inside aml/src/aml/wralea/py_stat.py to prevent gnuplot -# to be launched if these tests are run with nosetests. The remaining of the -# nodes are run. -# In order to have the gnuplot interface, run this script with python instead of nosetests - -def test_demo_corsican(): - """ Test changepoint demo corsican """ - res = run(('demo.changepoint_stat_tool','Corsican pine change point'),{},pm=pm) - assert res == [] - -def test_demo_dycorinia(): - """ Test dataflow demo dycorinia """ - res = run(('demo.changepoint_stat_tool','Dycorinia change point'),{},pm=pm) - assert res == [] - -def test_oak_demo(): - """ Test dataflow demo oak""" - res = run(('demo.changepoint_stat_tool', 'oak_demo'),{},pm=pm) - assert res == [] - -def test_beech1_demo(): - """ Test dataflow demo beech""" - res = run(('demo.changepoint_stat_tool', 'beech1'),{},pm=pm) - assert res == [] - -def test_stat_tool_demos_and_tutorial_convolution(): - """ Test dataflow demo compound tutorial""" - res = run(('demo.stat_tool demos and tutorials', 'convolution_tutorial'),{},pm=pm) - assert res == [] - - -def test_stat_tool_demos_and_tutorial_compound(): - res = run(('demo.stat_tool demos and tutorials', 'compound_tutorial'),{},pm=pm) - assert res == [] - - - -if __name__ == "__main__": - test_demo_corsican() - test_demo_dycorinia() - test_stat_tool_tutorial_compound() - test_stat_tool_tutorial_convolution() diff --git a/test/test_difference.py b/test/test_difference.py index d251a7b..52358ba 100644 --- a/test/test_difference.py +++ b/test/test_difference.py @@ -1,43 +1,48 @@ -""" Difference tests +"""Difference tests .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr - + """ + __revision__ = "$Id$" +import pytest from openalea.sequence_analysis.data_transform import Difference from openalea.sequence_analysis import Sequences -from tools import runTestClass, robust_path as get_shared_data +from .tools import robust_path as get_shared_data + + +@pytest.fixture +def build_seq1(): + return Sequences(str(get_shared_data("sequences1.seq"))) -seq1 = Sequences(str(get_shared_data("sequences1.seq"))) -seqn = Sequences(str(get_shared_data("sequences2.seq"))) +@pytest.fixture +def build_seqn(): + return Sequences(str(get_shared_data("sequences2.seq"))) -def test_difference1(): + +def test_difference1(build_seq1): """difference test to finalise""" - data = seq1 - res = Difference(data,1) - assert str(res)==str(data.difference(1, False)) + data = build_seq1 + res = Difference(data, 1) + assert str(res) == str(data.difference(1, False)) assert res.cumul_length == 50 -def test_difference1_first_element(): + +def test_difference1_first_element(build_seq1): """difference test to finalise""" - data = seq1 - res = Difference(data,1, True) - assert str(res)==str(data.difference(1, True)) + data = build_seq1 + res = Difference(data, 1, True) + assert str(res) == str(data.difference(1, True)) assert res.cumul_length == 52 -def test_differencen(): + +def test_differencen(build_seqn): """difference test to finalise""" - data = seqn - res = Difference(data,Variable=1) - assert str(res)==str(data.difference(1, False)) + data = build_seqn + res = Difference(data, Variable=1) + assert str(res) == str(data.difference(1, False)) assert res.cumul_length == 23 - -if __name__ == "__main__": - test_difference1() - test_difference1_first_element() - test_differencen() - diff --git a/test/test_estimate.py b/test/test_estimate.py index f699973..2708586 100644 --- a/test/test_estimate.py +++ b/test/test_estimate.py @@ -4,161 +4,226 @@ .. todo:: finalise """ + __revision__ = "$Id$" +import pytest + from openalea.stat_tool.vectors import Vectors from openalea.stat_tool.data_transform import ExtractHistogram -from tools import runTestClass -from test_tops import TopsData -from test_hidden_semi_markov import HiddenSemiMarkovData -from test_semi_markov import SemiMarkovData +from .test_tops import TopsData +from .test_hidden_semi_markov import HiddenSemiMarkovData -from openalea.sequence_analysis import * +from openalea.sequence_analysis import ( + Sequences, + Merge, + RemoveRun, + SegmentationExtract, + LengthSelect, + Estimate, +) -from tools import runTestClass, robust_path as get_shared_data +from .tools import robust_path as get_shared_data -_seq1 = Sequences(str(get_shared_data('dupreziana_20a2.seq'))) +_seq1 = Sequences(str(get_shared_data("dupreziana_20a2.seq"))) seq2 = RemoveRun(_seq1, 1, 0, "End") -seq3 = Sequences(str(get_shared_data('dupreziana_40a2.seq'))) +seq3 = Sequences(str(get_shared_data("dupreziana_40a2.seq"))) seq4_0 = RemoveRun(seq3, 2, 0, "End") seq4 = SegmentationExtract(seq4_0, 1, 2) -seq5 = Sequences(str(get_shared_data('dupreziana_60a2.seq'))) +seq5 = Sequences(str(get_shared_data("dupreziana_60a2.seq"))) seq6_0 = RemoveRun(seq5, 2, 0, "End") seq6 = LengthSelect(SegmentationExtract(seq6_0, 1, 2), 1, Mode="Reject") -seq7 = Sequences(str(get_shared_data('dupreziana_80a2.seq'))) +seq7 = Sequences(str(get_shared_data("dupreziana_80a2.seq"))) seq8_0 = RemoveRun(seq7, 2, 0, "End") seq8 = SegmentationExtract(seq8_0, 1, 2) seq10 = Merge(seq2, seq4, seq6, seq8) - - -class Test_Estimate_Histogram(): - def __init__(self): - self.data = self.create_data() - - def create_data(self): - seq0 = Sequences(str(get_shared_data( "chene_sessile_15pa.seq"))) - vec10 = Vectors(seq0) - return vec10 - - def test_estimate_mixture(self): - mixt20 = Estimate(ExtractHistogram(self.data, 2), - "MIXTURE", "NB", "NB", "NB", "NB", - NbComponent="Estimated") - assert mixt20.nb_component == 2 - - def test_estimate_mixture2(self): - mixt20 = Estimate(ExtractHistogram(self.data, 5), - "MIXTURE", "NB", "NB", "NB", "NB", - NbComponent="Estimated") - assert mixt20.nb_component == 3 - -class Test_Estimate_VARIABLE_ORDER_MARKOV(): - - def __init__(self): - self.sequence = seq10 - self.type = "VARIABLE_ORDER_MARKOV" - - def test_estimate(self): - mc10 = Estimate(self.sequence, self.type, "Ordinary", - MaxOrder=5, GlobalInitialTransition=True) - - def test_estimate1(self): - mc11 = Estimate(self.sequence , self.type, "Ordinary", - MaxOrder=5, GlobalInitialTransition=False) - - def test_estimate2(self): - mc12 = Estimate(self.sequence, self.type, "Ordinary", - Algorithm="LocalBIC", Threshold=10., - MaxOrder=5, GlobalInitialTransition=False, - GlobalSample=False) - - def test_estimate3(self): - mc13 = Estimate(self.sequence, self.type, "Ordinary", - Algorithm="Context", Threshold=1., - MaxOrder=5, GlobalInitialTransition=False, - GlobalSample=False) - def test_estimate4(self): - for Algorithm in ["CTM_BIC", "CTM_KT", "Context"]: - mc13 = Estimate(self.sequence, self.type, "Ordinary", - Algorithm=Algorithm, - MaxOrder=5, GlobalInitialTransition=False, - GlobalSample=False) - def test_estimate_error1(self): - """test that Estimator and Algorith=CTM_KT are incompatible""" - try: - mc13 = Estimate(self.sequence, self.type, "Ordinary", - Algorithm="CTM_KT", Estimator="Laplace", - MaxOrder=5, GlobalInitialTransition=False, - GlobalSample=False) - assert False - except: - assert True - - -class Test_Estimate_VARIABLE_ORDER_MARKOV_from_markovian(): - def test_estimate(self): - mc11 = Estimate(seq10 , "VARIABLE_ORDER_MARKOV", "Ordinary", - MaxOrder=5, GlobalInitialTransition=False) - mc2 = Estimate(seq2, "VARIABLE_ORDER_MARKOV", - mc11, GlobalInitialTransition=False) - -class Test_Estimate_HIDDEN_VARIABLE_ORDER_MARKOV(): - def test_estimate(self): - seq1 = Sequences(str(get_shared_data('sequences1.seq'))) - hvom_sample = HiddenVariableOrderMarkov(str(get_shared_data("dupreziana21.hc"))) - hmc_estimated = Estimate(seq1, "HIDDEN_VARIABLE_ORDER_MARKOV", hvom_sample, - GlobalInitialTransition=True, NbIteration=80) - assert hmc_estimated - -class Test_Estimate_HIDDEN_SEMI_MARKOV(): - - def __init__(self): - self.data = HiddenSemiMarkovData() - self.sequence = Sequences(str(get_shared_data( "wij1.seq"))) - - - def test_estimate(self): - seq = self.sequence - # data is a hsm class - Estimate(seq, "HIDDEN_SEMI-MARKOV", self.data) - - -class Test_Estimate_SEMI_MARKOV(): - - def __init__(self): - #self.data = SemiMarkovData() - self.sequence = Sequences(str(get_shared_data( "wij1.seq"))) - - def _test_estimate(self): - seq = self.sequence - # data is a hsm class - Estimate(seq, "SEMI-MARKOV", "Ordinary") - - -class Test_Estimate_time_events(): +@pytest.fixture +def create_data_estimate_histogram(): + seq0 = Sequences(str(get_shared_data("chene_sessile_15pa.seq"))) + return Vectors(seq0) + + +def test_estimate_mixture(create_data_estimate_histogram): + mixt20 = Estimate( + ExtractHistogram(create_data_estimate_histogram, 2), + "MIXTURE", + "NB", + "NB", + "NB", + "NB", + NbComponent="Estimated", + ) + assert mixt20.nb_component == 2 + + +def test_estimate_mixture2(create_data_estimate_histogram): + mixt20 = Estimate( + ExtractHistogram(create_data_estimate_histogram, 5), + "MIXTURE", + "NB", + "NB", + "NB", + "NB", + NbComponent="Estimated", + ) + assert mixt20.nb_component == 3 + + +sequence = seq10 +estimate_type = "VARIABLE_ORDER_MARKOV" + + +def test_estimate(): + mc10 = Estimate( + sequence, + estimate_type, + "Ordinary", + MaxOrder=5, + GlobalInitialTransition=True, + ) + + +def test_estimate1(): + mc11 = Estimate( + sequence, + estimate_type, + "Ordinary", + MaxOrder=5, + GlobalInitialTransition=False, + ) + + +def test_estimate2(): + mc12 = Estimate( + sequence, + estimate_type, + "Ordinary", + Algorithm="LocalBIC", + Threshold=10.0, + MaxOrder=5, + GlobalInitialTransition=False, + GlobalSample=False, + ) + + +def test_estimate3(): + mc13 = Estimate( + sequence, + estimate_type, + "Ordinary", + Algorithm="Context", + Threshold=1.0, + MaxOrder=5, + GlobalInitialTransition=False, + GlobalSample=False, + ) + + +def test_estimate4(): + for Algorithm in ["CTM_BIC", "CTM_KT", "Context"]: + mc13 = Estimate( + sequence, + estimate_type, + "Ordinary", + Algorithm=Algorithm, + MaxOrder=5, + GlobalInitialTransition=False, + GlobalSample=False, + ) + + +def test_estimate_error1(): + """test that Estimator and Algorith=CTM_KT are incompatible""" + try: + mc13 = Estimate( + sequence, + estimate_type, + "Ordinary", + Algorithm="CTM_KT", + Estimator="Laplace", + MaxOrder=5, + GlobalInitialTransition=False, + GlobalSample=False, + ) + assert False + except: + assert True + + +def Test_Estimate_VARIABLE_ORDER_MARKOV_from_markovian(): + mc11 = Estimate( + seq10, + "VARIABLE_ORDER_MARKOV", + "Ordinary", + MaxOrder=5, + GlobalInitialTransition=False, + ) + mc2 = Estimate(seq2, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) + + +@pytest.fixture +def create_sequence_estimate_variable_order_markov(): + return Sequences(str(get_shared_data("sequences1.seq"))) + + +@pytest.fixture +def create_hvom_estimate_hidden_variable_order_markov(): + return HiddenVariableOrderMarkov(str(get_shared_data("dupreziana21.hc"))) + + +def test_estimate_hidden_variable_order_markov( + create_sequence_estimate_variable_order_markov, + create_hvom_estimate_hidden_variable_order_markov, +): + hmc_estimated = Estimate( + create_sequence_estimate_variable_order_markov, + "HIDDEN_VARIABLE_ORDER_MARKOV", + create_hvom_estimate_hidden_variable_order_markov, + GlobalInitialTransition=True, + NbIteration=80, + ) + assert hmc_estimated + + +@pytest.fixture +def create_data_estimate_hidden_semi_markov(): + return HiddenSemiMarkovData() + + +@pytest.fixture +def create_sequence_estimate_hidden_semi_markov(): + return Sequences(str(get_shared_data("wij1.seq"))) + + +def test_estimate_hidden_semi_markov( + create_data_estimate_hidden_semi_markov, create_sequence_estimate_hidden_semi_markov +): + # data is a hsm class + Estimate( + create_sequence_estimate_hidden_semi_markov, + "HIDDEN_SEMI-MARKOV", + create_data_estimate_hidden_semi_markov, + ) + + +def test_estimate_semi_markov(): + sequence = Sequences(str(get_shared_data("wij1.seq"))) + Estimate(sequence, "SEMI-MARKOV", "Ordinary") + + +def test_estimate_time_events(): """test not yet implemented""" pass -class Test_Estimate_tops(): - """tests not yet implemented""" - - def __init__(self): - self.data = TopsData() - - def test_estimate(self): - Estimate(self.data, MinPosition=1, MaxPosition=10) +@pytest.fixture +def create_data_estimate_tops(): + return TopsData() -if __name__ == "__main__": - runTestClass(Test_Estimate_HIDDEN_VARIABLE_ORDER_MARKOV()) - runTestClass(Test_Estimate_VARIABLE_ORDER_MARKOV_from_markovian()) - runTestClass(Test_Estimate_VARIABLE_ORDER_MARKOV()) - runTestClass(Test_Estimate_Histogram()) - runTestClass(Test_Estimate_tops()) - runTestClass(Test_Estimate_HIDDEN_SEMI_MARKOV()) - runTestClass(Test_Estimate_SEMI_MARKOV()) +def test_estimate_tops(create_data_estimate_tops): + Estimate(create_data_estimate_tops, MinPosition=1, MaxPosition=10) diff --git a/test/test_exploratory.py b/test/test_exploratory.py index 5e482f9..1f91190 100644 --- a/test/test_exploratory.py +++ b/test/test_exploratory.py @@ -14,63 +14,101 @@ # ######################################################################### """ + __revision__ = "$Id$" -from openalea.sequence_analysis import * -from openalea.sequence_analysis.estimate import Estimate -from tools import runTestClass, robust_path as get_shared_data +from openalea.sequence_analysis import ( + Compare, + ContingencyTable, + Cluster, + Display, + ExtractHistogram, + Plot, + Regression, + SelectIndividual, + SelectVariable, + Sequences, + ValueSelect, + VarianceAnalysis, + Vectors, + VectorDistance, +) +from .tools import robust_path as get_shared_data + def test1(): - seq0 = Sequences(str(get_shared_data("chene_sessile_15pa.seq"))) Plot(seq0, ViewPoint="Data") - + # change of unit for the variable diameter of the annual shoot - + marginal3 = ExtractHistogram(seq0, "Value", 3) Plot(Cluster(marginal3, "Information", 0.75)) Plot(Cluster(marginal3, "Information", 0.61)) Plot(Cluster(marginal3, "Step", 10)) - + vec10 = Vectors(seq0) - + # plot of the average sequence - #BUG : TODO - #Plot(Regression(vec10, "MovingAverage", 1, 2, [1])) - + # BUG : TODO + # Plot(Regression(vec10, "MovingAverage", 1, 2, [1])) + vec95 = ValueSelect(vec10, 1, 95) vec96 = ValueSelect(vec10, 1, 96) vec97 = ValueSelect(vec10, 1, 97) - + VarianceAnalysis(vec10, 1, 2, "N") - - + print(type(ExtractHistogram(vec95, 2))) - - #BUG : TODO - #Compare(ExtractHistogram(vec95, 2), ExtractHistogram(vec96, 2), ExtractHistogram(vec97, 2), "N") - Plot(ExtractHistogram(vec95, 2), ExtractHistogram(vec96, 2), ExtractHistogram(vec97, 2)) - + + # BUG : TODO + # Compare(ExtractHistogram(vec95, 2), ExtractHistogram(vec96, 2), ExtractHistogram(vec97, 2), "N") + Plot( + ExtractHistogram(vec95, 2), + ExtractHistogram(vec96, 2), + ExtractHistogram(vec97, 2), + ) + ContingencyTable(vec10, 1, 4) - + # one-way variance analysis based on ranks - + VarianceAnalysis(vec10, 1, 4, "O") - #BUG : TODO - #Compare(ExtractHistogram(vec95, 4), ExtractHistogram(vec96, 4), ExtractHistogram(vec97, 4), "O") - Plot(ExtractHistogram(vec95, 4), ExtractHistogram(vec96, 4), ExtractHistogram(vec97, 4)) - - Plot(ExtractHistogram(vec95, 5), ExtractHistogram(vec96, 5), ExtractHistogram(vec97, 5)) - Plot(ExtractHistogram(vec95, 6), ExtractHistogram(vec96, 6), ExtractHistogram(vec97, 6)) - + # BUG : TODO + # Compare(ExtractHistogram(vec95, 4), ExtractHistogram(vec96, 4), ExtractHistogram(vec97, 4), "O") + Plot( + ExtractHistogram(vec95, 4), + ExtractHistogram(vec96, 4), + ExtractHistogram(vec97, 4), + ) + + Plot( + ExtractHistogram(vec95, 5), + ExtractHistogram(vec96, 5), + ExtractHistogram(vec97, 5), + ) + Plot( + ExtractHistogram(vec95, 6), + ExtractHistogram(vec96, 6), + ExtractHistogram(vec97, 6), + ) + vec11 = ValueSelect(vec10, 4, 1) vec12 = ValueSelect(vec10, 4, 2) vec13 = ValueSelect(vec10, 4, 3, 4) - - Plot(ExtractHistogram(vec11, 2), ExtractHistogram(vec12, 2), ExtractHistogram(vec13, 2)) - Plot(ExtractHistogram(vec11, 5), ExtractHistogram(vec12, 5), ExtractHistogram(vec13, 5)) - - #BUG : TODO + + Plot( + ExtractHistogram(vec11, 2), + ExtractHistogram(vec12, 2), + ExtractHistogram(vec13, 2), + ) + Plot( + ExtractHistogram(vec11, 5), + ExtractHistogram(vec12, 5), + ExtractHistogram(vec13, 5), + ) + + # BUG : TODO """mixt20 = Estimate(ExtractHistogram(vec10, 2), "MIXTURE", "NB", "NB", "NB", "NB", NbComponent="Estimated") Display(mixt20) Plot(mixt20) @@ -79,40 +117,187 @@ def test1(): mixt21 = Estimate(ExtractHistogram(vec10, 5), "MIXTURE", "NB", "NB", "NB", "NB", NbComponent="Estimated") """ vec9596 = ValueSelect(vec10, 1, 95, 96) - Plot(ExtractHistogram(ValueSelect(vec9596, 4, 1), 6), ExtractHistogram(ValueSelect(vec9596, 4, 2), 6), ExtractHistogram(ValueSelect(vec9596, 4, 3, 4), 6)) - + Plot( + ExtractHistogram(ValueSelect(vec9596, 4, 1), 6), + ExtractHistogram(ValueSelect(vec9596, 4, 2), 6), + ExtractHistogram(ValueSelect(vec9596, 4, 3, 4), 6), + ) + regress10 = Regression(vec10, "Linear", 5, 2) Display(regress10) Plot(regress10) - + # nonparametric regression (loess smoother) - - regress11 = Regression(vec10, "NearestNeighbours", 5, 2, 0.3) - + + regress11 = Regression(vec10, "NearestNeighbours", 5, 2, 0.3) + regress12 = Regression(vec9596, "Linear", 5, 6) regress13 = Regression(vec9596, "NearestNeighbours", 5, 6, 0.5) - + vec15 = SelectVariable(vec10, [1, 3, 6], Mode="Reject") - + # computation of a distance matrix using a standardization procedure - - #BUG : TODO - #matrix10 = Compare(vec15, VectorDistance("N", "N", "N")) - + + # BUG : TODO + # matrix10 = Compare(vec15, VectorDistance("N", "N", "N")) + # clustering using a partitioning method - - #BUG : TODO - #Display(Clustering(matrix10, "Partition", 2)) - - vec151 = SelectIndividual(vec10, [69, 48, 41, 44, 32, 47, 81, 95, 11, 36, 75, 108, 56, 83, 38, 98, 113, 134, 110, 101, 77, 35, 74, 80, 50, 24, 89, 128, 5, 45, 8, 116, 119, 132, 61, 78, 53, 29, 131, 65, 90, 96, 104, 20, 86, 66, 42, 68, 125, 14, 23, 54, 33, 26, 71, 129, 102, 51, 70, 111, 138, 19, 127, 62, 117, 137, 2, 28, 17]) - vec152 = SelectIndividual(vec10, [100, 13, 133, 105, 72, 9, 93, 109, 30, 115, 63, 7, 55, 37, 15, 114, 106, 46, 73, 18, 3, 87, 58, 43, 60, 76, 52, 6, 39, 31, 12, 99, 121, 123, 22, 79, 94, 88, 21, 97, 25, 40, 57, 136, 67, 49, 10, 4, 120, 92, 27, 91, 64, 124, 16, 130, 84, 107, 126, 103, 122, 112, 59, 1, 82, 34, 135, 118, 85]) + + # BUG : TODO + # Display(Clustering(matrix10, "Partition", 2)) + + vec151 = SelectIndividual( + vec10, + [ + 69, + 48, + 41, + 44, + 32, + 47, + 81, + 95, + 11, + 36, + 75, + 108, + 56, + 83, + 38, + 98, + 113, + 134, + 110, + 101, + 77, + 35, + 74, + 80, + 50, + 24, + 89, + 128, + 5, + 45, + 8, + 116, + 119, + 132, + 61, + 78, + 53, + 29, + 131, + 65, + 90, + 96, + 104, + 20, + 86, + 66, + 42, + 68, + 125, + 14, + 23, + 54, + 33, + 26, + 71, + 129, + 102, + 51, + 70, + 111, + 138, + 19, + 127, + 62, + 117, + 137, + 2, + 28, + 17, + ], + ) + vec152 = SelectIndividual( + vec10, + [ + 100, + 13, + 133, + 105, + 72, + 9, + 93, + 109, + 30, + 115, + 63, + 7, + 55, + 37, + 15, + 114, + 106, + 46, + 73, + 18, + 3, + 87, + 58, + 43, + 60, + 76, + 52, + 6, + 39, + 31, + 12, + 99, + 121, + 123, + 22, + 79, + 94, + 88, + 21, + 97, + 25, + 40, + 57, + 136, + 67, + 49, + 10, + 4, + 120, + 92, + 27, + 91, + 64, + 124, + 16, + 130, + 84, + 107, + 126, + 103, + 122, + 112, + 59, + 1, + 82, + 34, + 135, + 118, + 85, + ], + ) Plot(ExtractHistogram(vec151, 4), ExtractHistogram(vec152, 4)) - + matrix11 = Compare(vec15, VectorDistance("N", "O", "N")) - + vec16 = SelectVariable(vec9596, [1, 3], Mode="Reject") matrix12 = Compare(vec16, VectorDistance("N", "N", "N", "N")) matrix13 = Compare(vec16, VectorDistance("N", "O", "N", "N")) - -if __name__ == "__main__": - test1() diff --git a/test/test_exploratory2.py b/test/test_exploratory2.py index d3c1e34..60f48da 100644 --- a/test/test_exploratory2.py +++ b/test/test_exploratory2.py @@ -21,47 +21,63 @@ # ######################################################################### """ + __revision__ = "$Id$" -from openalea.sequence_analysis import * +from openalea.sequence_analysis import ( + ComputeCorrelation, + Display, + ExtractHistogram, + Merge, + Plot, + RemoveRun, + SegmentationExtract, + Sequences, + Transcode, +) from openalea.sequence_analysis.estimate import Estimate as Estimate -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data + def test_exploratory(): - seq19 = Sequences(str(get_shared_data( "dupreziana_20a2.seq"))) + seq19 = Sequences(str(get_shared_data("dupreziana_20a2.seq"))) seq20 = RemoveRun(seq19, 0, "End") histo201 = ExtractHistogram(seq20, "Recurrence", 1) histo202 = ExtractHistogram(seq20, "Recurrence", 2) - seq38 = Sequences(str(get_shared_data( "dupreziana_40a2.seq"))) + seq38 = Sequences(str(get_shared_data("dupreziana_40a2.seq"))) seq39 = RemoveRun(seq38, 2, 0, "End") seq40 = SegmentationExtract(seq39, 1, 2) histo401 = ExtractHistogram(seq40, "Recurrence", 1) histo402 = ExtractHistogram(seq40, "Recurrence", 2) - seq58 = Sequences(str(get_shared_data( "dupreziana_60a2.seq"))) + seq58 = Sequences(str(get_shared_data("dupreziana_60a2.seq"))) seq59 = RemoveRun(seq58, 2, 0, "End") - seq60 = LengthSelect(SegmentationExtract(seq59, 1, 2), 1, Mode="Reject") + seq60 = LengthSelect(SegmentationExtract(seq59, 1, 2), 1, Mode="Reject") histo601 = ExtractHistogram(seq60, "Recurrence", 1) histo602 = ExtractHistogram(seq60, "Recurrence", 2) - seq78 = Sequences(str(get_shared_data( "dupreziana_80a2.seq"))) + seq78 = Sequences(str(get_shared_data("dupreziana_80a2.seq"))) seq79 = RemoveRun(seq78, 2, 0, "End") seq80 = SegmentationExtract(seq79, 1, 2) histo801 = ExtractHistogram(seq80, "Recurrence", 1) histo802 = ExtractHistogram(seq80, "Recurrence", 2) - Plot(histo201, histo401, histo601, histo801) Plot(histo202, histo402, histo602, histo802) - Plot(ExtractHistogram(seq20, "Length"), ExtractHistogram(seq40, "Length"), ExtractHistogram(seq60, "Length"), ExtractHistogram(seq80, "Length")) - + Plot( + ExtractHistogram(seq20, "Length"), + ExtractHistogram(seq40, "Length"), + ExtractHistogram(seq60, "Length"), + ExtractHistogram(seq80, "Length"), + ) + seq10 = Merge(seq20, seq40, seq60, seq80) Display(seq10, ViewPoint="Data") - #Plot(seq10, "Intensity") - #Plot(seq10, "Recurrence") - #Plot(seq10, "Sojourn") + # Plot(seq10, "Intensity") + # Plot(seq10, "Recurrence") + # Plot(seq10, "Sojourn") # plot of a sample Spearman (rank based) autocorrelation function @@ -69,22 +85,27 @@ def test_exploratory(): seq11 = Transcode(seq10, [0, 1, 0]) seq12 = Transcode(seq10, [0, 0, 1]) - acf1 = Merge(ComputeCorrelation(seq11, MaxLag=15, Normalization="Exact"),\ - ComputeCorrelation(seq12, MaxLag=15, Normalization="Exact")) + acf1 = Merge( + ComputeCorrelation(seq11, MaxLag=15, Normalization="Exact"), + ComputeCorrelation(seq12, MaxLag=15, Normalization="Exact"), + ) Plot(acf1) Display(acf1) - acf2 = Merge(ComputeCorrelation(seq11, Type="Spearman", MaxLag=15, Normalization="Exact"),\ - ComputeCorrelation(seq12, Type="Spearman", MaxLag=15, Normalization="Exact")) - acf3 = Merge(ComputeCorrelation(seq11, Type="Kendall", MaxLag=15),\ - ComputeCorrelation(seq12, Type="Kendall", MaxLag=15)) + acf2 = Merge( + ComputeCorrelation(seq11, Type="Spearman", MaxLag=15, Normalization="Exact"), + ComputeCorrelation(seq12, Type="Spearman", MaxLag=15, Normalization="Exact"), + ) + acf3 = Merge( + ComputeCorrelation(seq11, Type="Kendall", MaxLag=15), + ComputeCorrelation(seq12, Type="Kendall", MaxLag=15), + ) + # model selection approach: estimation of both the parameters (initial probabilities and # transition probabilities) and the order (memory length) of a Markov chain -#todo -#mc10 = Estimate(seq10, "MARKOV", MaxOrder=4) -#Plot(mc10, "Intensity") -#Plot(mc10, "Recurrence") -if __name__ == "__main__": - test_exploratory() +# todo +# mc10 = Estimate(seq10, "MARKOV", MaxOrder=4) +# Plot(mc10, "Intensity") +# Plot(mc10, "Recurrence") diff --git a/test/test_exploratory3.py b/test/test_exploratory3.py index 41dcbb5..5d95688 100644 --- a/test/test_exploratory3.py +++ b/test/test_exploratory3.py @@ -21,69 +21,88 @@ # ######################################################################### """ + __revision__ = "$Id$" -from openalea.sequence_analysis import * -from tools import runTestClass, robust_path as get_shared_data +from openalea.sequence_analysis import ( + ComputeCorrelation, + Display, + ExtractHistogram, + Merge, + Plot, + RemoveRun, + SegmentationExtract, + Sequences, + Transcode, +) +from .tools import runTestClass, robust_path as get_shared_data + def test1(): seq19 = Sequences(str(get_shared_data("dupreziana_20a2.seq"))) seq20 = RemoveRun(seq19, 0, "End") histo201 = ExtractHistogram(seq20, "Recurrence", 1) histo202 = ExtractHistogram(seq20, "Recurrence", 2) - + seq38 = Sequences(str(get_shared_data("dupreziana_40a2.seq"))) seq39 = RemoveRun(seq38, 2, 0, "End") seq40 = SegmentationExtract(seq39, 1, 2) histo401 = ExtractHistogram(seq40, "Recurrence", 1) histo402 = ExtractHistogram(seq40, "Recurrence", 2) - + seq58 = Sequences(str(get_shared_data("dupreziana_60a2.seq"))) seq59 = RemoveRun(seq58, 2, 0, "End") - seq60 = LengthSelect(SegmentationExtract(seq59, 1, 2), 1, Mode="Reject") + seq60 = LengthSelect(SegmentationExtract(seq59, 1, 2), 1, Mode="Reject") histo601 = ExtractHistogram(seq60, "Recurrence", 1) histo602 = ExtractHistogram(seq60, "Recurrence", 2) - + seq78 = Sequences(str(get_shared_data("dupreziana_80a2.seq"))) seq79 = RemoveRun(seq78, 2, 0, "End") seq80 = SegmentationExtract(seq79, 1, 2) histo801 = ExtractHistogram(seq80, "Recurrence", 1) histo802 = ExtractHistogram(seq80, "Recurrence", 2) - + Plot(histo201, histo401, histo601, histo801) Plot(histo202, histo402, histo602, histo802) - Plot(ExtractHistogram(seq20, "Length"), ExtractHistogram(seq40, "Length"), ExtractHistogram(seq60, "Length"), ExtractHistogram(seq80, "Length")) - + Plot( + ExtractHistogram(seq20, "Length"), + ExtractHistogram(seq40, "Length"), + ExtractHistogram(seq60, "Length"), + ExtractHistogram(seq80, "Length"), + ) + seq10 = Merge(seq20, seq40, seq60, seq80) Display(seq10, ViewPoint="Data") Plot(seq10, "Intensity") Plot(seq10, "Recurrence") Plot(seq10, "Sojourn") - + # plot of a sample Spearman (rank based) autocorrelation function - + Plot(ComputeCorrelation(seq10, Type="Spearman", MaxLag=15, Normalization="Exact")) - + seq11 = Transcode(seq10, [0, 1, 0]) seq12 = Transcode(seq10, [0, 0, 1]) - acf1 = Merge(ComputeCorrelation(seq11, MaxLag=15, Normalization="Exact"),\ - ComputeCorrelation(seq12, MaxLag=15, Normalization="Exact")) + acf1 = Merge( + ComputeCorrelation(seq11, MaxLag=15, Normalization="Exact"), + ComputeCorrelation(seq12, MaxLag=15, Normalization="Exact"), + ) Plot(acf1) Display(acf1) - - acf2 = Merge(ComputeCorrelation(seq11, Type="Spearman", MaxLag=15, Normalization="Exact"),\ - ComputeCorrelation(seq12, Type="Spearman", MaxLag=15, Normalization="Exact")) - acf3 = Merge(ComputeCorrelation(seq11, Type="Kendall", MaxLag=15),\ - ComputeCorrelation(seq12, Type="Kendall", MaxLag=15)) - + + acf2 = Merge( + ComputeCorrelation(seq11, Type="Spearman", MaxLag=15, Normalization="Exact"), + ComputeCorrelation(seq12, Type="Spearman", MaxLag=15, Normalization="Exact"), + ) + acf3 = Merge( + ComputeCorrelation(seq11, Type="Kendall", MaxLag=15), + ComputeCorrelation(seq12, Type="Kendall", MaxLag=15), + ) + # model selection approach: estimation of both the parameters (initial probabilities and # transition probabilities) and the order (memory length) of a Markov chain - - #todo - #mc10 = Estimate(seq10, MARKOV, MaxOrder=4) - + + # todo + # mc10 = Estimate(seq10, MARKOV, MaxOrder=4) + # Plot(mc10, "Intensity") # Plot(mc10, "Recurrence") - -if __name__ == "__main__": - test1() - diff --git a/test/test_exploratory4.py b/test/test_exploratory4.py index 474c0ce..bec7c5e 100644 --- a/test/test_exploratory4.py +++ b/test/test_exploratory4.py @@ -17,63 +17,109 @@ # ######################################################################### """ + __revision__ = "$Id$" -from openalea.sequence_analysis import * -from openalea.sequence_analysis.compare import Compare as Compare -from tools import runTestClass, robust_path as get_shared_data +from openalea.sequence_analysis import ( + Clustering, + Compare, + Display, + ExtractHistogram, + ExtractVectors, + MergeVariable, + Plot, + SegmentationExtract, + Sequences, + Shift, + VectorDistance, +) +from .tools import robust_path as get_shared_data + def test1(): + seq1 = Sequences(str(get_shared_data("dupreziana_a1.seq"))) - seq1 = Sequences(str(get_shared_data( "dupreziana_a1.seq"))) - Display(seq1, ViewPoint="Data", Format="Line") - - vec20 = MergeVariable(ExtractVectors(seq1, "NbOccurrence", 1, 3), ExtractVectors(seq1, "Length")) + + vec20 = MergeVariable( + ExtractVectors(seq1, "NbOccurrence", 1, 3), ExtractVectors(seq1, "Length") + ) Display(vec20) - #todo - #Plot(vec20,1) + # todo + # Plot(vec20,1) Plot(vec20) - + seq2 = Shift(seq1, 1, -3) - + seq3 = SegmentationExtract(seq1, 1, 3) seq4 = SegmentationExtract(seq1, 1, 4) - Plot(ExtractHistogram(seq3, "Recurrence", 1), ExtractHistogram(seq4, "Recurrence", 1)) + Plot( + ExtractHistogram(seq3, "Recurrence", 1), ExtractHistogram(seq4, "Recurrence", 1) + ) Plot(ExtractHistogram(seq3, "Sojourn", 1), ExtractHistogram(seq4, "Sojourn", 1)) - Plot(ExtractHistogram(seq3, "Recurrence", 2), ExtractHistogram(seq4, "Recurrence", 2)) + Plot( + ExtractHistogram(seq3, "Recurrence", 2), ExtractHistogram(seq4, "Recurrence", 2) + ) Plot(ExtractHistogram(seq3, "Sojourn", 2), ExtractHistogram(seq4, "Sojourn", 2)) - - Plot(ExtractHistogram(seq4, "Recurrence", 2), ExtractHistogram(seq4, "Recurrence", 3), ExtractHistogram(seq4, "Recurrence", 4)) - + + Plot( + ExtractHistogram(seq4, "Recurrence", 2), + ExtractHistogram(seq4, "Recurrence", 3), + ExtractHistogram(seq4, "Recurrence", 4), + ) + matrix20 = Compare(seq1, VectorDistance("N", "N")) Plot(matrix20) Display(Clustering(matrix20, "Partition", 2)) Clustering(matrix20, "Hierarchy") - + matrix21 = Compare(seq1, VectorDistance("O", "O")) Plot(matrix21) Display(Clustering(matrix21, "Partition", 2)) Clustering(matrix21, "Hierarchy") - - seq11 = SelectIndividual(seq2, [18, 9, 10, 31, 6, 14, 29, 16, 1, 12, 5, 7, 25, 22, 17, 30, 13, 4, 21, 27, 20, 24]) + + seq11 = SelectIndividual( + seq2, + [ + 18, + 9, + 10, + 31, + 6, + 14, + 29, + 16, + 1, + 12, + 5, + 7, + 25, + 22, + 17, + 30, + 13, + 4, + 21, + 27, + 20, + 24, + ], + ) seq12 = SelectIndividual(seq2, [28, 19, 32, 23, 26, 11, 3, 15, 8, 33, 2]) - #todo - #Plot(ExtractHistogram(seq11, "FirstOccurrence", 1, 0), ExtractHistogram(seq12, "FirstOccurrence", 1, 0)) - ComparisonTest("W", ExtractHistogram(seq11, "Length"), ExtractHistogram(seq12, "Length")) + # todo + # Plot(ExtractHistogram(seq11, "FirstOccurrence", 1, 0), ExtractHistogram(seq12, "FirstOccurrence", 1, 0)) + ComparisonTest( + "W", ExtractHistogram(seq11, "Length"), ExtractHistogram(seq12, "Length") + ) Plot(ExtractHistogram(seq11, "Length"), ExtractHistogram(seq12, "Length")) - - #todo - #Plot(seq2, "Intensity", 1) - - #Plot(seq2, "Intensity", 2) + + # todo + # Plot(seq2, "Intensity", 1) + + # Plot(seq2, "Intensity", 2) seq5 = RemoveRun(seq2, 2, 0, "End") Plot(seq5, "Intensity", 1) - - + if __name__ == "__main__": test1() - - - diff --git a/test/test_exploratory5.py b/test/test_exploratory5.py index a196e53..1bf981c 100644 --- a/test/test_exploratory5.py +++ b/test/test_exploratory5.py @@ -18,65 +18,80 @@ # ######################################################################### """ + __revision__ = "$Id$" -from openalea.sequence_analysis import * -from tools import runTestClass, robust_path as get_shared_data +from openalea.sequence_analysis import ( + Cluster, + Clustering, + Compare, + Display, + ExtractHistogram, + Merge, + Plot, + Reverse, + Sequences, + VectorDistance, +) +from .tools import robust_path as get_shared_data + def test1(): - seq20 = Sequences(str(get_shared_data("belren1.seq"))) seq21 = Sequences(str(get_shared_data("elstar1.seq"))) seq22 = Sequences(str(get_shared_data("fuji1.seq"))) seq23 = Sequences(str(get_shared_data("gala1.seq"))) seq24 = Sequences(str(get_shared_data("granny1.seq"))) seq25 = Sequences(str(get_shared_data("reinet1.seq"))) - + Display(seq25, ViewPoint="Data") Plot(seq25, "Intensity") Plot(seq25, "Sojourn") - + seq26 = Reverse(seq25) Plot(seq26, "Intensity") Plot(seq26, "FirstOccurrence") - + # Sojourn time (run length) distributions - + seq30 = Merge(seq20, seq21, seq22, seq23, seq24, seq25) Plot(seq30, "Sojourn") - Plot(ExtractHistogram(seq30, "Sojourn", 1), ExtractHistogram(seq30, "Sojourn", 2), ExtractHistogram(seq30, "Sojourn", 3), ExtractHistogram(seq30, "Sojourn", 4)) - - #todo - #mc30 = Estimate(seq30, "MARKOV", MaxOrder=4) - - #todo does not work in aml either + Plot( + ExtractHistogram(seq30, "Sojourn", 1), + ExtractHistogram(seq30, "Sojourn", 2), + ExtractHistogram(seq30, "Sojourn", 3), + ExtractHistogram(seq30, "Sojourn", 4), + ) + + # todo + # mc30 = Estimate(seq30, "MARKOV", MaxOrder=4) + + # todo does not work in aml either # Plot(mc30, "Sojourn") - #Display(Estimate(seq30, "MARKOV")) - + # Display(Estimate(seq30, "MARKOV")) + seq31 = Cluster(seq30, "Limit", [1, 4]) - #todo - #mc31 = Estimate(seq31, "MARKOV", Order=2) - + # todo + # mc31 = Estimate(seq31, "MARKOV", Order=2) + # Plot(mc31, "Sojourn") # Display(Estimate(seq31, "MARKOV")) - + # comparison of sequences by dynamic programming algorithms - + seq32 = Merge(seq20, seq25) matrix30 = Compare(seq32) matrix31 = Compare(seq32, VectorDistance("S")) matrix32 = Compare(seq32, VectorDistance("S"), Transposition=True) - matrix33 = Compare(seq32, VectorDistance(str(get_shared_data("test_align1.a"))), Transposition=True) - - #todo + matrix33 = Compare( + seq32, VectorDistance(str(get_shared_data("test_align1.a"))), Transposition=True + ) + + # todo Display(Clustering(matrix33, "Partition", 2)) Clustering(matrix33, "Hierarchy") - + Compare(seq25, TestSequence=9, RefSequence=1) Compare(seq25, VectorDistance("S"), TestSequence=9, RefSequence=1) - Compare(seq25, VectorDistance("S"), TestSequence=9, RefSequence=1, Transposition=True) - - -if __name__ == "__main__": - test1() - - + Compare( + seq25, VectorDistance("S"), TestSequence=9, RefSequence=1, Transposition=True + ) diff --git a/test/test_exploratory6.py b/test/test_exploratory6.py index 494c085..7c26971 100644 --- a/test/test_exploratory6.py +++ b/test/test_exploratory6.py @@ -17,87 +17,130 @@ # ######################################################################### """ + __revision__ = "$Id$" -from openalea.sequence_analysis import * -from openalea.sequence_analysis.compare import Compare as Compare -from tools import runTestClass, robust_path as get_shared_data +from openalea.sequence_analysis import ( + Clustering, + Compare, + ComputeCorrelation, + ComputeWhiteNoiseCorrelation, + Convolution, + Difference, + Display, + Distribution, + Merge, + MovingAverage, + Plot, + Regression, + SelectIndividual, + SelectVariable, + Sequences, + VariableScaling, + VectorDistance, + Vectors, +) +from .tools import robust_path as get_shared_data -def test1(): +def test1(): seq66 = Sequences(str(get_shared_data("laricio_date66.seq"))) Plot(seq66, ViewPoint="Data") - #Plot(Cumulate(seq66), ViewPoint="Data") - + # Plot(Cumulate(seq66), ViewPoint="Data") + vec66 = Vectors(seq66) regress66_1 = Regression(vec66, "MovingAverage", 1, 2, [1]) Plot(regress66_1) regress66_2 = Regression(vec66, "MovingAverage", 1, 3, [1]) - + regress66_23 = Regression(vec66, "NearestNeighbours", 2, 3, 0.3) Display(regress66_23) Plot(regress66_23) - - vec70 = Vectors(SelectIndividual(seq66, [1, 2, 3])) + + vec70 = Vectors(SelectIndividual(seq66, [1, 2, 3])) regress70_1 = Regression(vec70, "MovingAverage", 1, 2, [1]) Plot(regress70_1) regress70_2 = Regression(vec70, "MovingAverage", 1, 3, [1]) - - vec71 = Vectors(SelectIndividual(seq66, [4, 5, 6])) + + vec71 = Vectors(SelectIndividual(seq66, [4, 5, 6])) regress71_1 = Regression(vec71, "MovingAverage", 1, 2, [1]) Plot(regress71_1) regress71_2 = Regression(vec71, "MovingAverage", 1, 3, [1]) - - matrix66 = Compare(SelectVariable(seq66, 1, Mode="Reject"), VectorDistance("N", "N")) + + matrix66 = Compare( + SelectVariable(seq66, 1, Mode="Reject"), VectorDistance("N", "N") + ) Display(Clustering(matrix66, "Partition", 3)) Clustering(matrix66, "Hierarchy") - + # extraction of trends (slowly varying component) and residuals (rapidly varying component) # by symmetric smoothing filters and computation of sample autocorrelation functions from residuals - + seq67 = Difference(seq66) - acf11 = Merge(ComputeCorrelation(seq67, 2, MaxLag=10),\ - ComputeCorrelation(seq67, 3, MaxLag=10)) - acf11 = Merge(ComputeCorrelation(seq67, 2, MaxLag=10, Normalization="Exact"),\ - ComputeCorrelation(seq67, 3, MaxLag=10, Normalization="Exact")) + acf11 = Merge( + ComputeCorrelation(seq67, 2, MaxLag=10), ComputeCorrelation(seq67, 3, MaxLag=10) + ) + acf11 = Merge( + ComputeCorrelation(seq67, 2, MaxLag=10, Normalization="Exact"), + ComputeCorrelation(seq67, 3, MaxLag=10, Normalization="Exact"), + ) ComputeWhiteNoiseCorrelation(acf11, 1) Plot(acf11) - + # symmetric smoothing filters of half-width 3 - + filter1 = Convolution(Distribution("B", 0, 3, 0.2), Distribution("B", 0, 3, 0.8)) - filter2 = Convolution(Distribution("B", 0, 2, 0.2), Distribution("B", 0, 2, 0.5), Distribution("B", 0, 2, 0.8)) - filter3 = Convolution(Distribution("U", 0, 2), Distribution("U", 0, 2), Distribution("U", 0, 2)) + filter2 = Convolution( + Distribution("B", 0, 2, 0.2), + Distribution("B", 0, 2, 0.5), + Distribution("B", 0, 2, 0.8), + ) + filter3 = Convolution( + Distribution("U", 0, 2), Distribution("U", 0, 2), Distribution("U", 0, 2) + ) filter4 = Convolution(Distribution("U", 0, 3), Distribution("U", 0, 3)) - Plot(filter1, filter2, Distribution("B", 0, 6, 0.5), filter3, filter4, Distribution("U", 0, 6)) - + Plot( + filter1, + filter2, + Distribution("B", 0, 6, 0.5), + filter3, + filter4, + Distribution("U", 0, 6), + ) + seq68 = MovingAverage(seq66, Distribution("B", 0, 6, 0.5), BeginEnd=True) - - seq69 = MovingAverage(VariableScaling(seq66, 3, 100), Distribution("B", 0, 6, 0.5), BeginEnd=True, Output="Residual") - acf12 = Merge(ComputeCorrelation(seq69, 2, MaxLag=10),\ - ComputeCorrelation(seq69, 3, MaxLag=10)) - acf12 = Merge(ComputeCorrelation(seq69, 2, MaxLag=10, Normalization="Exact"),\ - ComputeCorrelation(seq69, 3, MaxLag=10, Normalization="Exact")) + + seq69 = MovingAverage( + VariableScaling(seq66, 3, 100), + Distribution("B", 0, 6, 0.5), + BeginEnd=True, + Output="Residual", + ) + acf12 = Merge( + ComputeCorrelation(seq69, 2, MaxLag=10), ComputeCorrelation(seq69, 3, MaxLag=10) + ) + acf12 = Merge( + ComputeCorrelation(seq69, 2, MaxLag=10, Normalization="Exact"), + ComputeCorrelation(seq69, 3, MaxLag=10, Normalization="Exact"), + ) ComputeWhiteNoiseCorrelation(acf12, Distribution("B", 0, 6, 0.5)) Plot(acf12) - + seq70 = MovingAverage(seq66, [1, 1, 1], BeginEnd=True) - seq71 = MovingAverage(VariableScaling(seq66, 3, 100), [1, 1, 1], BeginEnd=True, Output="Residual") - acf13 = Merge(ComputeCorrelation(seq71, 2, MaxLag=10),\ - ComputeCorrelation(seq71, 3, MaxLag=10)) - acf13 = Merge(ComputeCorrelation(seq71, 2, MaxLag=10, Normalization="Exact"),\ - ComputeCorrelation(seq71, 3, MaxLag=10, Normalization="Exact")) + seq71 = MovingAverage( + VariableScaling(seq66, 3, 100), [1, 1, 1], BeginEnd=True, Output="Residual" + ) + acf13 = Merge( + ComputeCorrelation(seq71, 2, MaxLag=10), ComputeCorrelation(seq71, 3, MaxLag=10) + ) + acf13 = Merge( + ComputeCorrelation(seq71, 2, MaxLag=10, Normalization="Exact"), + ComputeCorrelation(seq71, 3, MaxLag=10, Normalization="Exact"), + ) ComputeWhiteNoiseCorrelation(acf13, [1, 1, 1]) Plot(acf13) - - seq80 = Sequences(str(get_shared_data( "laricio_position66.seq")), OldFormat=True) - - #Plot(Cumulate(seq80), ViewPoint="Data") - - -if __name__ == "__main__": - test1() - + seq80 = Sequences(str(get_shared_data("laricio_position66.seq")), OldFormat=True) + # Plot(Cumulate(seq80), ViewPoint="Data") diff --git a/test/test_extract_distribution.py b/test/test_extract_distribution.py index 1ef8e11..729c26f 100644 --- a/test/test_extract_distribution.py +++ b/test/test_extract_distribution.py @@ -9,7 +9,7 @@ from openalea.stat_tool.data_transform import ExtractDistribution from openalea.sequence_analysis import * -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data def test_hidden_semi_markov(): @@ -35,4 +35,4 @@ def test_top_param(): test_hidden_semi_markov() test_renewal() test_semi_markov() - test_top_param() \ No newline at end of file + test_top_param() diff --git a/test/test_extract_histogram.py b/test/test_extract_histogram.py index 9e33925..898d0a8 100644 --- a/test/test_extract_histogram.py +++ b/test/test_extract_histogram.py @@ -10,7 +10,7 @@ from openalea.stat_tool.data_transform import ValueSelect, ExtractHistogram from openalea.sequence_analysis import * -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data seq = Sequences(str(get_shared_data("pin_laricio_7x.seq"))) seq_cluster = Cluster(seq, "Step", 1, 10) diff --git a/test/test_extract_parameter_index.py b/test/test_extract_parameter_index.py index ff61cc3..557c4f9 100644 --- a/test/test_extract_parameter_index.py +++ b/test/test_extract_parameter_index.py @@ -7,7 +7,7 @@ from openalea.sequence_analysis.data_transform import IndexParameterExtract from openalea.sequence_analysis.sequences import Sequences -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data def test1(): """FIXME markovian_sequences call""" diff --git a/test/test_extract_vectors.py b/test/test_extract_vectors.py index 9632c9e..9a55218 100644 --- a/test/test_extract_vectors.py +++ b/test/test_extract_vectors.py @@ -7,7 +7,7 @@ from openalea.sequence_analysis import * -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data seq0 = Sequences(str(get_shared_data("chene_sessile_15pa.seq"))) diff --git a/test/test_functional.py b/test/test_functional.py deleted file mode 100644 index 07f8768..0000000 --- a/test/test_functional.py +++ /dev/null @@ -1,11 +0,0 @@ -import runpy - - -def _test_functional1(): - runpy.run_module('functional1') - -def test_functional2(): - runpy.run_module('functional2') - -def _test_functional3(): - runpy.run_module('functional3') diff --git a/test/test_functional1.py b/test/test_functional1.py new file mode 100644 index 0000000..5dee186 --- /dev/null +++ b/test/test_functional1.py @@ -0,0 +1,157 @@ +"""functional tests + + +.. todo:: to be done +""" + +__revision__ = "$Id$" + +from openalea.stat_tool import * +from openalea.sequence_analysis import ( + ComputeAutoCorrelation, + ComputeCorrelation, + Estimate, + ExtractHistogram, + ExtractVectors, + HiddenVariableOrderMarkov, + LengthSelect, + Merge, + MergeVariable, + Plot, + RemoveRun, + SegmentationExtract, + Sequences, + Simulate, + Transcode, + WordCount, +) +from .tools import robust_path as get_shared_data + +seq1 = Sequences(str(get_shared_data("dupreziana_20a2.seq"))) # correct +seq2 = RemoveRun(seq1, 1, 0, "End") # correct + +histo21 = ExtractHistogram(seq2, "Recurrence", 1) # correct +histo22 = ExtractHistogram(seq2, "Recurrence", 2) # correct + +seq3 = Sequences(str(get_shared_data("dupreziana_40a2.seq"))) # correct +seq4_0 = RemoveRun(seq3, 2, 0, "End") # correct +seq4 = SegmentationExtract(seq4_0, 1, 2) # correct + + +seq5 = Sequences(str(get_shared_data("dupreziana_60a2.seq"))) # correct +seq6_0 = RemoveRun(seq5, 2, 0, "End") # correct +seq6 = LengthSelect(SegmentationExtract(seq6_0, 1, 2), 1, Mode="Reject") # correct + + +seq7 = Sequences(str(get_shared_data("dupreziana_80a2.seq"))) # correct +seq8_0 = RemoveRun(seq7, 2, 0, "End") # correct +seq8 = SegmentationExtract(seq8_0, 1, 2) # correct + + +seq10 = Merge(seq2, seq4, seq6, seq8) + +seq10_1 = RecurrenceTimeSequences(seq10, 1) +seq10_2 = RecurrenceTimeSequences(seq10, 2) + + +vec10 = MergeVariable( + ExtractVectors(seq10, "Length"), + ExtractVectors(seq10, "NbOccurrence", 1, 1), + ExtractVectors(seq10, "NbOccurrence", 1, 2), + ExtractVectors(seq10, "Cumul"), +) + +seq11 = Transcode(seq10, [0, 1, 0]) +seq12 = Transcode(seq10, [0, 0, 1]) + +acf1 = Merge(ComputeCorrelation(seq11, MaxLag=15), ComputeCorrelation(seq12, MaxLag=15)) + +acf2 = Merge( + ComputeCorrelation(seq11, Type="Spearman", MaxLag=15), + ComputeCorrelation(seq12, Type="Spearman", MaxLag=15), +) +acf3 = Merge( + ComputeCorrelation(seq11, Type="Kendall", MaxLag=15), + ComputeCorrelation(seq12, Type="Kendall", MaxLag=15), +) + + +WordCount(seq10, 3, BeginState=1, EndState=1, MinFrequency=10) +WordCount(seq10, 4, BeginState=2, EndState=2) +WordCount(seq10, 4, BeginState=2, EndState=1) + + +mc10 = Estimate( + seq10, "VARIABLE_ORDER_MARKOV", "Ordinary", MaxOrder=5, GlobalInitialTransition=True +) + +mc11 = Estimate( + seq10, + "VARIABLE_ORDER_MARKOV", + "Ordinary", + MaxOrder=5, + GlobalInitialTransition=False, +) + +Plot(mc11, "Intensity") + +mc12 = Estimate( + seq10, + "VARIABLE_ORDER_MARKOV", + "Ordinary", + Algorithm="LocalBIC", + Threshold=10.0, + MaxOrder=5, + GlobalInitialTransition=False, + GlobalSample=False, +) +mc13 = Estimate( + seq10, + "VARIABLE_ORDER_MARKOV", + "Ordinary", + Algorithm="Context", + Threshold=1.0, + MaxOrder=5, + GlobalInitialTransition=False, + GlobalSample=False, +) + +acf11 = ComputeAutoCorrelation(mc11, 1, MaxLag=20) +acf12 = ComputeAutoCorrelation(mc11, 2, MaxLag=20) + +mc2 = Estimate(seq2, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) +mc4 = Estimate(seq4, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) +mc6 = Estimate(seq6, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) +mc8 = Estimate(seq8, "VARIABLE_ORDER_MARKOV", mc11, GlobalInitialTransition=False) + + +# TODO compare functions crashes sometimes +# matrix1 = Compare(Thresholding(mc2, MinProbability=0.001), seq10, Thresholding(mc4, MinProbability=0.001), seq10, Thresholding(mc6, MinProbability=0.001), seq10, Thresholding(mc8, MinProbability=0.001), seq10, 10000) +# matrix2 = Compare(Thresholding(mc2, MinProbability=0.001), seq2, Thresholding(mc4, MinProbability=0.001), seq4, Thresholding(mc6, MinProbability=0.001), seq6, Thresholding(mc8, MinProbability=0.001), seq8, 10000) + + +# Compare(seq10, Thresholding(mc2, MinProbability=0.001), Thresholding(mc4, MinProbability=.001), Thresholding(mc6, MinProbability=0.001), Thresholding(mc8, MinProbability=0.001)) + + +# test # +hmc9 = HiddenVariableOrderMarkov(str(get_shared_data("dupreziana21.hc"))) +hmc10 = Estimate( + seq10, + "HIDDEN_VARIABLE_ORDER_MARKOV", + hmc9, + GlobalInitialTransition=True, + NbIteration=80, +) +hmc11 = Estimate( + seq10, + "HIDDEN_VARIABLE_ORDER_MARKOV", + hmc9, + GlobalInitialTransition=False, + NbIteration=80, +) + + +acf21 = ComputeAutoCorrelation(hmc11, 1, 1, MaxLag=20) +acf22 = ComputeAutoCorrelation(hmc11, 1, 2, MaxLag=20) + +seq15 = Simulate(hmc11, 10000, seq10) diff --git a/test/test_functional2.py b/test/test_functional2.py new file mode 100644 index 0000000..f6f894b --- /dev/null +++ b/test/test_functional2.py @@ -0,0 +1,201 @@ +"""functional tests""" + +__revision__ = "$Id$" + + +from openalea.sequence_analysis import ( + Cluster, + Compare, + Display, + Estimate, + ExtractData, + ExtractHistogram, + HiddenSemiMarkov, + Merge, + Plot, + Reverse, + Sequences, + VectorDistance, +) +from .tools import robust_path as get_shared_data + + +seq20 = Sequences(str(get_shared_data("belren1.seq"))) +seq21 = Sequences(str(get_shared_data("elstar1.seq"))) +seq22 = Sequences(str(get_shared_data("fuji1.seq"))) +seq23 = Sequences(str(get_shared_data("gala1.seq"))) +seq24 = Sequences(str(get_shared_data("granny1.seq"))) +seq25 = Sequences(str(get_shared_data("reinet1.seq"))) +seq26 = Sequences(str(get_shared_data("wij1.seq"))) + +Display(seq25, ViewPoint="Data") +Plot(seq25, "Intensity") +Plot(seq25, "Sojourn") + +seq26 = Reverse(seq25) +Plot(seq26, "Intensity") +Plot(seq26, "FirstOccurrence") + +# Sojourn time (run length) distributions + +seq30 = Merge(seq20, seq21, seq22, seq23, seq24, seq25) +Plot(seq30, "Sojourn") +Plot( + ExtractHistogram(seq30, "Sojourn", 1), + ExtractHistogram(seq30, "Sojourn", 2), + ExtractHistogram(seq30, "Sojourn", 3), + ExtractHistogram(seq30, "Sojourn", 4), +) + +mc30 = Estimate( + seq30, + "VARIABLE_ORDER_MARKOV", + "Ordinary", + MaxOrder=4, + GlobalInitialTransition=False, +) +mc30 = Estimate( + seq30, + "VARIABLE_ORDER_MARKOV", + "Ordinary", + MaxOrder=4, + Algorithm="BIC", + GlobalInitialTransition=False, +) +# todo empty plot +# Plot(mc30, "Sojourn") +Display(Estimate(seq30, "VARIABLE_ORDER_MARKOV", "Ordinary", Order=1)) +Display( + Estimate( + seq30, + "VARIABLE_ORDER_MARKOV", + "Ordinary", + Order=2, + GlobalInitialTransition=False, + ) +) + +seq31 = Cluster(seq30, "Limit", [1, 4]) +mc31 = Estimate( + seq30, + "VARIABLE_ORDER_MARKOV", + "Ordinary", + MaxOrder=4, + GlobalInitialTransition=False, +) +mc31 = Estimate( + seq31, "VARIABLE_ORDER_MARKOV", "Ordinary", Order=2, GlobalInitialTransition=False +) +Plot(mc31, "Sojourn") +Display(Estimate(seq31, "VARIABLE_ORDER_MARKOV", "Ordinary", Order=1)) + +# comparison of sequences by dynamic programming algorithms + +seq32 = Merge(seq20, seq25) +matrix30 = Compare(seq32) +matrix31 = Compare(seq32, VectorDistance("S")) +matrix32 = Compare(seq32, VectorDistance("S"), Transposition=True) +matrix33 = Compare( + seq32, VectorDistance(str(get_shared_data("test_align1.a"))), Transposition=True +) + +Display(Clustering(matrix33, "Partition", 2)) +Clustering(matrix33, "Hierarchy", Algorithm="Agglomerative") +Clustering(matrix33, "Hierarchy", Algorithm="Divisive") + +# multiple alignment + +seq33 = Compare( + SelectIndividual(seq25, [10, 11, 12, 14, 15]), + VectorDistance("S"), + Output="Sequences", + Algorithm="Agglomerative", +) +seq34 = Compare( + SelectIndividual(seq25, [10, 11, 12, 14, 15]), + VectorDistance("S"), + Output="Sequences", + Algorithm="Divisive", +) +seq35 = Compare( + SelectIndividual(seq25, [10, 11, 12, 14, 15]), + VectorDistance("S"), + Output="Sequences", + Algorithm="Ordering", +) + +Compare(seq25, TestSequence=9, RefSequence=1) +Compare(seq25, VectorDistance("S"), TestSequence=9, RefSequence=1) +Compare(seq25, VectorDistance("S"), TestSequence=9, RefSequence=1, Transposition=True) + +# multiple change-point models + +Display(seq25, 14, 6, "Multinomial", ViewPoint="SegmentProfile") +Display(seq25, 14, 6, "Multinomial", ViewPoint="SegmentProfile", Output="ChangePoint") +Plot(seq25, 14, 6, "Multinomial", ViewPoint="SegmentProfile") +Plot(seq25, 14, 6, "Multinomial", ViewPoint="SegmentProfile", Output="ChangePoint") +# hidden semi-Markov chains + +hsmc0 = HiddenSemiMarkov(str(get_shared_data("belren1.hsc"))) +hsmc20 = Estimate(seq20, "HIDDEN_SEMI-MARKOV", hsmc0) + +hsmc0 = HiddenSemiMarkov(str(get_shared_data("elstar1.hsc"))) +hsmc21 = Estimate(seq21, "HIDDEN_SEMI-MARKOV", hsmc0) + +hsmc0 = HiddenSemiMarkov(str(get_shared_data("fuji1.hsc"))) +hsmc22 = Estimate(seq22, "HIDDEN_SEMI-MARKOV", hsmc0) + +hsmc0 = HiddenSemiMarkov(str(get_shared_data("gala1.hsc"))) +hsmc23 = Estimate(seq23, "HIDDEN_SEMI-MARKOV", hsmc0) + +hsmc0 = HiddenSemiMarkov(str(get_shared_data("granny1.hsc"))) +hsmc24 = Estimate(seq24, "HIDDEN_SEMI-MARKOV", hsmc0) + +hsmc0 = HiddenSemiMarkov(str(get_shared_data("reinet1.hsc"))) +hsmc25 = Estimate(seq25, "HIDDEN_SEMI-MARKOV", hsmc0) + +Display(hsmc25) +Plot(hsmc25, "Intensity", 1) +Plot(hsmc25, "FirstOccurrence", 1) +Plot(hsmc25, "Counting", 1) + +# state +Plot(hsmc25, "Intensity") +Plot(hsmc25, "Sojourn") +# observed +Plot(hsmc25, "Sojourn", 1) + +Plot(hsmc25, 1, ViewPoint="StateProfile") +Plot(hsmc25, 1, ViewPoint="StateProfile", Output="InState") +Plot(hsmc25, 1, ViewPoint="StateProfile", Output="OutState") + +seq25_1 = ExtractData(hsmc25) +Display(seq25_1, ViewPoint="Data", Format="Line") + +hsmc0 = HiddenSemiMarkov(str(get_shared_data("wij1.hsc"))) +hsmc26 = Estimate(seq26, "HIDDEN_SEMI-MARKOV", hsmc0) + +# model comparison + +# Thresholding(hsmc20, MinProbability=0.001) +# Thresholding(hsmc21, MinProbability=0.001) +# Thresholding(hsmc22, MinProbability=0.001) +# Thresholding(hsmc23, MinProbability=0.001) +# Thresholding(hsmc24, MinProbability=0.001) +# Thresholding(hsmc25, MinProbability=0.001) +# Thresholding(hsmc26, MinProbability=0.001) + + +# matrix20 = Compare(Thresholding(hsmc22, MinProbability=0.001), seq22, 10000) + +# matrix20 = Compare(Thresholding(hsmc20, MinProbability=0.001), seq20, Thresholding(hsmc21, MinProbability=0.001), seq21, Thresholding(hsmc22, MinProbability=0.001), seq22, Thresholding(hsmc24, MinProbability=0.001), seq24, Thresholding(hsmc25, MinProbability=0.001), seq25, Thresholding(hsmc26, MinProbability=0.001), seq26, 10000) + +# TODO unstable the line above works, the line below does not +# matrix20 = Compare(Thresholding(hsmc20, MinProbability=0.001), seq20, Thresholding(hsmc21, MinProbability=0.001), seq21, Thresholding(hsmc22, MinProbability=0.001), seq22, Thresholding(hsmc23, MinProbability=0.001), seq23, Thresholding(hsmc24, MinProbability=0.001), seq24, Thresholding(hsmc25, MinProbability=0.001), seq25, Thresholding(hsmc26, MinProbability=0.001), seq26, 10000, FileName="ASCII/cultivar1_models.txt") + +# may be slow +# matrix21 = Compare(Thresholding(hsmc20, MinProbability=0.001), Thresholding(hsmc21, MinProbability=0.001), Thresholding(hsmc22, MinProbability=0.001), Thresholding(hsmc22, MinProbability=0.001), Thresholding(hsmc24, MinProbability=0.001), Thresholding(hsmc25, MinProbability=0.001), Thresholding(hsmc26, MinProbability=0.001), 100, 90) +# matrix21 = Compare(Thresholding(hsmc20, MinProbability=0.001), Thresholding(hsmc21, MinProbability=0.001), Thresholding(hsmc22, MinProbability=0.001), Thresholding(hsmc22, MinProbability=0.001), Thresholding(hsmc24, MinProbability=0.001), Thresholding(hsmc25, MinProbability=0.001), Thresholding(hsmc26, MinProbability=0.001), 100, 90, FileName="ASCII/cultivar1_models_90.txt") + + +# Plot(matrix20) diff --git a/test/functional3.py b/test/test_functional3.py similarity index 50% rename from test/functional3.py rename to test/test_functional3.py index 97061ba..91d0f4d 100644 --- a/test/functional3.py +++ b/test/test_functional3.py @@ -19,18 +19,48 @@ # ######################################################################### """ -__revision__ = "$Id$" +__revision__ = "$Id$" -import os -from openalea.sequence_analysis import * -from openalea.sequence_analysis.estimate import Estimate -from openalea.sequence_analysis.compare import Compare -from tools import runTestClass, robust_path as get_shared_data -seq69 = Sequences(str(get_shared_data( "pin_laricio_7x.seq"))) +from openalea.sequence_analysis import ( + Cluster, + Clustering, + Compare, + ComputeCorrelation, + ComputeWhiteNoiseCorrelation, + Convolution, + Cumulate, + Difference, + Display, + Distribution, + Estimate, + ExtractData, + ExtractDistribution, + ExtractHistogram, + Fit, + HiddenSemiMarkov, + Merge, + MergeVariable, + Mixture, + MovingAverage, + Plot, + PointwiseAverage, + Regression, + SelectIndividual, + SelectVariable, + Segmentation, + SegmentationExtract, + Sequences, + SojournTimeSequences, + VectorDistance, + Vectors, +) +from .tools import robust_path as get_shared_data + +seq69 = Sequences(str(get_shared_data("pin_laricio_7x.seq"))) seq70 = Cluster(seq69, "Step", 1, 10) -#seq70 = IndexParameterExtract(Cluster(seq69, "Step", 2, 10), 1927, MaxIndex=1992) +# seq70 = IndexParameterExtract(Cluster(seq69, "Step", 2, 10), 1927, MaxIndex=1992) seq2 = SelectVariable(seq70, 1) Plot(seq2, 2, 5, "Gaussian", ViewPoint="SegmentProfile") @@ -42,7 +72,7 @@ Plot(Regression(vec70, "MovingAverage", 1, 2, [1])) Plot(Regression(vec70, "MovingAverage", 1, 3, [1])) -vec71 = Vectors(SelectIndividual(seq70, [1, 2, 3])) +vec71 = Vectors(SelectIndividual(seq70, [1, 2, 3])) Plot(Regression(vec71, "MovingAverage", 1, 2, [1])) Plot(Regression(vec71, "MovingAverage", 1, 3, [1])) @@ -51,11 +81,13 @@ Plot(SelectIndividual(seq71, [0, 4]), ViewPoint="Data") seq72 = PointwiseAverage(SelectIndividual(seq70, [1, 2, 3]), Output="Residual") -seq72 = PointwiseAverage(SelectIndividual(seq70, [1, 2, 3]), Output="StandardizedResidual") +seq72 = PointwiseAverage( + SelectIndividual(seq70, [1, 2, 3]), Output="StandardizedResidual" +) Plot(SelectIndividual(seq72, [1, 2, 3]), ViewPoint="Data") Plot(SelectIndividual(Cumulate(seq72), [1, 2, 3]), ViewPoint="Data") -vec73 = Vectors(SelectIndividual(seq70, [4, 5, 6])) +vec73 = Vectors(SelectIndividual(seq70, [4, 5, 6])) Plot(Regression(vec73, "MovingAverage", 1, 2, [1])) Plot(Regression(vec73, "MovingAverage", 1, 3, [1])) @@ -64,13 +96,15 @@ Plot(SelectIndividual(seq73, [3, 7]), ViewPoint="Data") seq74 = PointwiseAverage(SelectIndividual(seq70, [4, 5, 6]), Output="Residual") -seq74 = PointwiseAverage(SelectIndividual(seq70, [4, 5, 6]), Output="StandardizedResidual") +seq74 = PointwiseAverage( + SelectIndividual(seq70, [4, 5, 6]), Output="StandardizedResidual" +) Plot(SelectIndividual(seq74, [4, 5, 6]), ViewPoint="Data") Plot(SelectIndividual(Cumulate(seq74), [4, 5, 6]), ViewPoint="Data") -matrix70 = Compare(seq70, VectorDistance("N", "N"), IndelFactor=1., End="Free") -matrix70 = Compare(seq70, VectorDistance("N", "N"), IndelFactor=1.) +matrix70 = Compare(seq70, VectorDistance("N", "N"), IndelFactor=1.0, End="Free") +matrix70 = Compare(seq70, VectorDistance("N", "N"), IndelFactor=1.0) Display(Clustering(matrix70, "Partition", 3)) Clustering(matrix70, "Hierarchy") @@ -78,30 +112,65 @@ # by symmetric smoothing filters and computation of sample autocorrelation functions from residuals seq75 = Difference(seq70) -acf11 = Merge(ComputeCorrelation(seq75, 1, MaxLag=10), ComputeCorrelation(seq75, 2, MaxLag=10)) +acf11 = Merge( + ComputeCorrelation(seq75, 1, MaxLag=10), ComputeCorrelation(seq75, 2, MaxLag=10) +) ComputeWhiteNoiseCorrelation(acf11, 1) Plot(acf11) # symmetric smoothing filters of half-width 3 filter1 = Convolution(Distribution("B", 0, 6, 0.2), Distribution("B", 0, 6, 0.8)) -filter2 = Convolution(Distribution("B", 0, 4, 0.2), Distribution("B", 0, 4, 0.5), Distribution("B", 0, 4, 0.8)) -filter3 = Convolution(Distribution("U", 0, 2), Distribution("U", 0, 2), Distribution("U", 0, 2), Distribution("U", 0, 2), Distribution("U", 0, 2), Distribution("U", 0, 2)) -filter4 = Convolution(Distribution("U", 0, 3), Distribution("U", 0, 3), Distribution("U", 0, 3), Distribution("U", 0, 3)) -filter5 = Convolution(Distribution("U", 0, 4), Distribution("U", 0, 4), Distribution("U", 0, 4)) +filter2 = Convolution( + Distribution("B", 0, 4, 0.2), + Distribution("B", 0, 4, 0.5), + Distribution("B", 0, 4, 0.8), +) +filter3 = Convolution( + Distribution("U", 0, 2), + Distribution("U", 0, 2), + Distribution("U", 0, 2), + Distribution("U", 0, 2), + Distribution("U", 0, 2), + Distribution("U", 0, 2), +) +filter4 = Convolution( + Distribution("U", 0, 3), + Distribution("U", 0, 3), + Distribution("U", 0, 3), + Distribution("U", 0, 3), +) +filter5 = Convolution( + Distribution("U", 0, 4), Distribution("U", 0, 4), Distribution("U", 0, 4) +) filter6 = Convolution(Distribution("U", 0, 6), Distribution("U", 0, 6)) -Plot(filter1, filter2, Distribution("B", 0, 12, 0.5), filter3, filter4, filter5, filter6, Distribution("U", 0, 12)) +Plot( + filter1, + filter2, + Distribution("B", 0, 12, 0.5), + filter3, + filter4, + filter5, + filter6, + Distribution("U", 0, 12), +) seq76 = MovingAverage(seq70, Distribution("B", 0, 16, 0.5), BeginEnd=True) -seq77 = MovingAverage(seq70, Distribution("B", 0, 16, 0.5), BeginEnd=True, Output="Residual") -acf12 = Merge(ComputeCorrelation(seq73, 1, MaxLag=10), ComputeCorrelation(seq73, 2, MaxLag=10)) +seq77 = MovingAverage( + seq70, Distribution("B", 0, 16, 0.5), BeginEnd=True, Output="Residual" +) +acf12 = Merge( + ComputeCorrelation(seq73, 1, MaxLag=10), ComputeCorrelation(seq73, 2, MaxLag=10) +) ComputeWhiteNoiseCorrelation(acf12, Distribution("B", 0, 6, 0.5)) Plot(acf12) seq78 = MovingAverage(seq70, [1, 1, 1], BeginEnd=True) seq79 = MovingAverage(seq70, [1, 1, 1], BeginEnd=True, Output="Residual") -acf13 = Merge(ComputeCorrelation(seq75, 1, MaxLag=10), ComputeCorrelation(seq75, 2, MaxLag=10)) +acf13 = Merge( + ComputeCorrelation(seq75, 1, MaxLag=10), ComputeCorrelation(seq75, 2, MaxLag=10) +) ComputeWhiteNoiseCorrelation(acf13, [1, 1, 1]) Plot(acf13) @@ -125,21 +194,45 @@ # multivariate segmentation -Display(seq70, 5, 4, "Gaussian", "Gaussian", ViewPoint="SegmentProfile", NbSegmentation=5) +Display( + seq70, 5, 4, "Gaussian", "Gaussian", ViewPoint="SegmentProfile", NbSegmentation=5 +) Plot(seq70, 5, 4, "Gaussian", "Gaussian", ViewPoint="SegmentProfile") -Plot(seq70, 5, 4, "Gaussian", "Gaussian", ViewPoint="SegmentProfile", Output="ChangePoint") +Plot( + seq70, + 5, + 4, + "Gaussian", + "Gaussian", + ViewPoint="SegmentProfile", + Output="ChangePoint", +) # estimation of a hidden semi-Markov chain -hmc60 = HiddenSemiMarkov(str(get_shared_data( "pin_laricio_6.hsc"))) +hmc60 = HiddenSemiMarkov(str(get_shared_data("pin_laricio_6.hsc"))) hmc6 = Estimate(seq70, "HIDDEN_SEMI-MARKOV", hmc60) -hsmc60 = HiddenSemiMarkov(str(get_shared_data( "pin_laricio_6.hsc"))) +hsmc60 = HiddenSemiMarkov(str(get_shared_data("pin_laricio_6.hsc"))) hsmc6 = Estimate(seq70, "HIDDEN_SEMI-MARKOV", hsmc60) hsmc61 = Estimate(seq70, "HIDDEN_SEMI-MARKOV", "Ordinary", 6, "LeftRight") -Plot(ExtractDistribution(hsmc6, "Observation", 1, 0), ExtractDistribution(hsmc6, "Observation", 1, 1), ExtractDistribution(hsmc6, "Observation", 1, 2), ExtractDistribution(hsmc6, "Observation", 1, 3), ExtractDistribution(hsmc6, "Observation", 1, 4), ExtractDistribution(hsmc6, "Observation", 1, 5)) -Plot(ExtractDistribution(hsmc6, "Observation", 2, 0), ExtractDistribution(hsmc6, "Observation", 2, 1), ExtractDistribution(hsmc6, "Observation", 2, 2), ExtractDistribution(hsmc6, "Observation", 2, 3), ExtractDistribution(hsmc6, "Observation", 2, 4), ExtractDistribution(hsmc6, "Observation", 2, 5)) +Plot( + ExtractDistribution(hsmc6, "Observation", 1, 0), + ExtractDistribution(hsmc6, "Observation", 1, 1), + ExtractDistribution(hsmc6, "Observation", 1, 2), + ExtractDistribution(hsmc6, "Observation", 1, 3), + ExtractDistribution(hsmc6, "Observation", 1, 4), + ExtractDistribution(hsmc6, "Observation", 1, 5), +) +Plot( + ExtractDistribution(hsmc6, "Observation", 2, 0), + ExtractDistribution(hsmc6, "Observation", 2, 1), + ExtractDistribution(hsmc6, "Observation", 2, 2), + ExtractDistribution(hsmc6, "Observation", 2, 3), + ExtractDistribution(hsmc6, "Observation", 2, 4), + ExtractDistribution(hsmc6, "Observation", 2, 5), +) # 1, 3, 5 Plot(hsmc6, 5, ViewPoint="StateProfile") @@ -151,26 +244,77 @@ Display(seq61, ViewPoint="Data", Format="Line") - - - -mixt61 = Mixture(21. / 406., ExtractDistribution(hsmc6, "Observation", 1, 0), 29. / 406., ExtractDistribution(hsmc6, "Observation", 1, 1), 140. / 406., ExtractDistribution(hsmc6, "Observation", 1, 2), 87. / 406., ExtractDistribution(hsmc6, "Observation", 1, 3), 71. / 406., ExtractDistribution(hsmc6, "Observation", 1, 4), 58. / 406., ExtractDistribution(hsmc6, "Observation", 1, 5)) -mixt61 = Mixture(0.0497296, ExtractDistribution(hsmc6, "Observation", 1, 0), 0.0750034, ExtractDistribution(hsmc6, "Observation", 1, 1), 0.3416, ExtractDistribution(hsmc6, "Observation", 1, 2), 0.207806, ExtractDistribution(hsmc6, "Observation", 1, 3), 0.159426, ExtractDistribution(hsmc6, "Observation", 1, 4), 0.166434, ExtractDistribution(hsmc6, "Observation", 1, 5)) +mixt61 = Mixture( + 21.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 1, 0), + 29.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 1, 1), + 140.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 1, 2), + 87.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 1, 3), + 71.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 1, 4), + 58.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 1, 5), +) +mixt61 = Mixture( + 0.0497296, + ExtractDistribution(hsmc6, "Observation", 1, 0), + 0.0750034, + ExtractDistribution(hsmc6, "Observation", 1, 1), + 0.3416, + ExtractDistribution(hsmc6, "Observation", 1, 2), + 0.207806, + ExtractDistribution(hsmc6, "Observation", 1, 3), + 0.159426, + ExtractDistribution(hsmc6, "Observation", 1, 4), + 0.166434, + ExtractDistribution(hsmc6, "Observation", 1, 5), +) Plot(Fit(ExtractHistogram(seq70, "Value", 1), ExtractDistribution(mixt61, "Mixture"))) -mixt62 = Mixture(21. / 406., ExtractDistribution(hsmc6, "Observation", 2, 0), 29. / 406., ExtractDistribution(hsmc6, "Observation", 2, 1), 140. / 406., ExtractDistribution(hsmc6, "Observation", 2, 2), 87. / 406., ExtractDistribution(hsmc6, "Observation", 2, 3), 71. / 406., ExtractDistribution(hsmc6, "Observation", 2, 4), 58. / 406., ExtractDistribution(hsmc6, "Observation", 2, 5)) -mixt62 = Mixture(0.0497296, ExtractDistribution(hsmc6, "Observation", 2, 0), 0.0750034, ExtractDistribution(hsmc6, "Observation", 2, 1), 0.3416, ExtractDistribution(hsmc6, "Observation", 2, 2), 0.207806, ExtractDistribution(hsmc6, "Observation", 2, 3), 0.159426, ExtractDistribution(hsmc6, "Observation", 2, 4), 0.166434, ExtractDistribution(hsmc6, "Observation", 2, 5)) +mixt62 = Mixture( + 21.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 2, 0), + 29.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 2, 1), + 140.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 2, 2), + 87.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 2, 3), + 71.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 2, 4), + 58.0 / 406.0, + ExtractDistribution(hsmc6, "Observation", 2, 5), +) +mixt62 = Mixture( + 0.0497296, + ExtractDistribution(hsmc6, "Observation", 2, 0), + 0.0750034, + ExtractDistribution(hsmc6, "Observation", 2, 1), + 0.3416, + ExtractDistribution(hsmc6, "Observation", 2, 2), + 0.207806, + ExtractDistribution(hsmc6, "Observation", 2, 3), + 0.159426, + ExtractDistribution(hsmc6, "Observation", 2, 4), + 0.166434, + ExtractDistribution(hsmc6, "Observation", 2, 5), +) Plot(Fit(ExtractHistogram(seq70, "Value", 2), ExtractDistribution(mixt62, "Mixture"))) # comparason with the segmentations deduced from the 6-state hidden semi-Markov chain -seq46 = Merge( Segmentation(seq70, 1, [1935, 1961, 1972, 1990], "Gaussian", "Gaussian"), -Segmentation(seq70, 2, [1932, 1936, 1961, 1984, 1986], "Gaussian", "Gaussian"), -Segmentation(seq70, 3, [1932, 1949, 1971, 1985, 1990], "Gaussian", "Gaussian"), -Segmentation(seq70, 4, [1930, 1953, 1963, 1977], "Gaussian", "Gaussian"), -Segmentation(seq70, 5, [1931, 1963, 1975, 1991], "Gaussian", "Gaussian"), -Segmentation(seq70, 6, [1931, 1943, 1960, 1976], "Gaussian", "Gaussian")) +seq46 = Merge( + Segmentation(seq70, 1, [1935, 1961, 1972, 1990], "Gaussian", "Gaussian"), + Segmentation(seq70, 2, [1932, 1936, 1961, 1984, 1986], "Gaussian", "Gaussian"), + Segmentation(seq70, 3, [1932, 1949, 1971, 1985, 1990], "Gaussian", "Gaussian"), + Segmentation(seq70, 4, [1930, 1953, 1963, 1977], "Gaussian", "Gaussian"), + Segmentation(seq70, 5, [1931, 1963, 1975, 1991], "Gaussian", "Gaussian"), + Segmentation(seq70, 6, [1931, 1943, 1960, 1976], "Gaussian", "Gaussian"), +) seq47 = SelectVariable(seq46, [3, 5]) Plot(seq47, ViewPoint="Data") @@ -179,7 +323,7 @@ seq49 = SelectVariable(seq48, [3]) # these two lines works together # seq47 = SelectVariable(seq46, [3]) -#Plot(Merge(SelectIndividual(seq47, [1]), SelectIndividual(seq49, [1])), ViewPoint=Data) +# Plot(Merge(SelectIndividual(seq47, [1]), SelectIndividual(seq49, [1])), ViewPoint=Data) # analyse des residus @@ -191,7 +335,14 @@ acf50 = ComputeCorrelation(seq50, 1, MaxLag=10) Plot(acf50) -seq51 = Merge(SegmentationExtract(seq50, 1, 0), SegmentationExtract(seq50, 1, 1), SegmentationExtract(seq50, 1, 2), SegmentationExtract(seq50, 1, 3), SegmentationExtract(seq50, 1, 4), SegmentationExtract(seq50, 1, 5)) +seq51 = Merge( + SegmentationExtract(seq50, 1, 0), + SegmentationExtract(seq50, 1, 1), + SegmentationExtract(seq50, 1, 2), + SegmentationExtract(seq50, 1, 3), + SegmentationExtract(seq50, 1, 4), + SegmentationExtract(seq50, 1, 5), +) acf51 = ComputeCorrelation(seq51, MaxLag=10) Plot(acf51) @@ -206,4 +357,3 @@ seq57 = Segmentation(seq80, [5, 5, 5, 4, 4, 4], "Mean") seq58 = Segmentation(seq80, [5, 5, 5, 4, 4, 4], "Gaussian") Display(MergeVariable(SelectVariable(seq57, 1), seq58), ViewPoint="Data", Format="Line") - diff --git a/test/test_hidden_semi_markov.py b/test/test_hidden_semi_markov.py index 69c3a4e..7b01ae5 100644 --- a/test/test_hidden_semi_markov.py +++ b/test/test_hidden_semi_markov.py @@ -21,8 +21,8 @@ import openalea.stat_tool.plot #import DISABLE_PLOT openalea.stat_tool.plot.DISABLE_PLOT = True -from tools import interface -from tools import runTestClass, robust_path as get_shared_data +from .tools import interface +from .tools import runTestClass, robust_path as get_shared_data import os diff --git a/test/test_hidden_semi_markov_functional.py b/test/test_hidden_semi_markov_functional.py index 80b2897..2d9d293 100644 --- a/test/test_hidden_semi_markov_functional.py +++ b/test/test_hidden_semi_markov_functional.py @@ -18,8 +18,8 @@ DISABLE_PLOT = False # DISABLE_PLOT = True -from tools import interface -from tools import runTestClass, robust_path as get_shared_data +from .tools import interface +from .tools import runTestClass, robust_path as get_shared_data import os diff --git a/test/test_hidden_variable_order_markov.py b/test/test_hidden_variable_order_markov.py index b432c5b..1acea84 100644 --- a/test/test_hidden_variable_order_markov.py +++ b/test/test_hidden_variable_order_markov.py @@ -14,10 +14,10 @@ from openalea.stat_tool.cluster import Cluster from openalea.stat_tool.cluster import Transcode, Cluster -from tools import interface -from tools import runTestClass +from .tools import interface +from .tools import runTestClass -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data class Test(interface): diff --git a/test/test_iterator.py b/test/test_iterator.py index 944ac98..0dab9e7 100644 --- a/test/test_iterator.py +++ b/test/test_iterator.py @@ -1,80 +1,91 @@ -""" Test renewal data structure +"""Test renewal data structure .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr """ + __revision__ = "$Id$" +import pytest + from openalea.sequence_analysis import _sequence_analysis as sa -from openalea.sequence_analysis.hidden_variable_order_markov import * -from openalea.sequence_analysis.hidden_semi_markov import * -from openalea.sequence_analysis.renewal import * -from tools import runTestClass, robust_path as get_shared_data +from openalea.sequence_analysis.hidden_variable_order_markov import ( + HiddenVariableOrderMarkov, +) +from openalea.sequence_analysis.hidden_semi_markov import HiddenSemiMarkov +from openalea.sequence_analysis.renewal import Renewal +from .tools import runTestClass, robust_path as get_shared_data N = 10 -import os + + +@pytest.fixture +def create_data_hidden_semi_markov(): + return HiddenSemiMarkov(str(get_shared_data("test_hidden_semi_markov.dat"))) + + # SEMI MARKOV case -def test_semi_markov_iterator(): - hsm = HiddenSemiMarkov(str(get_shared_data('test_hidden_semi_markov.dat'))) +def test_semi_markov_iterator(create_data_hidden_semi_markov): + hsm = create_data_hidden_semi_markov smi = sa._SemiMarkovIterator(hsm) sim = smi.simulation(N, True) + def hsm_iterator(fn): hsm = HiddenSemiMarkov(fn) it = sa._SemiMarkovIterator(hsm) return it -def test_semi_markov_iterator2(): - fn = str(get_shared_data('test_hidden_semi_markov.dat')) + +def test_semi_markov_iterator2(create_data_hidden_semi_markov): + fn = create_data_hidden_semi_markov smi = hsm_iterator(fn) sim = smi.simulation(N, True) + # VARIABLE ORDER MARKOV case -def vom_iterator(fn): + +@pytest.fixture +def create_data_variable_order_markov(): + return str(get_shared_data("dupreziana21.hc")) + + +def vom_iterator(fn): vom = HiddenVariableOrderMarkov(fn) it = sa._VariableOrderMarkovIterator(vom) return it -def test_variable_order_markov_iterator(): - vom = HiddenVariableOrderMarkov(str(get_shared_data('dupreziana21.hc'))) + +def test_variable_order_markov_iterator(create_data_variable_order_markov): + vom = HiddenVariableOrderMarkov(create_data_variable_order_markov) smi = sa._VariableOrderMarkovIterator(vom) sim = smi.simulation(N, True) -def test_variable_order_markov_iterator2(): - fn = str(get_shared_data('dupreziana21.hc')) + +def test_variable_order_markov_iterator2(create_data_variable_order_markov): + fn = create_data_variable_order_markov smi = vom_iterator(fn) sim = smi.simulation(N, True) + # RENEWAL case def renewal_iterator(fn): ren = Renewal(fn) it = sa._RenewalIterator(ren) return it + def _test_renewal_iterator2(): """to be fixed""" fn = path + "abri13.ren" smi = renewal_iterator(fn) sim = smi.simulation(N, True) + def _test_renewal_iterator(): """to be fixed""" ren = Renewal(path + "abri13.ren") print((type(ren))) smi = sa._RenewalIterator(ren) sim = smi.simulation(N, True) - - - -if __name__ == "__main__": - - test_semi_markov_iterator() - test_semi_markov_iterator2() - - test_variable_order_markov_iterator() - test_variable_order_markov_iterator2() - - #test_renewal_iterator() - #test_renewal_iterator2() - diff --git a/test/test_merge.py b/test/test_merge.py index 4196f7f..fce8256 100644 --- a/test/test_merge.py +++ b/test/test_merge.py @@ -1,16 +1,17 @@ """unitary or functional tests on Merge. - See also test_semi_markov, test_time_events and so on - - .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr +See also test_semi_markov, test_time_events and so on + +.. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr """ + __revision__ = "$Id$" -from openalea.stat_tool.data_transform import Merge +from openalea.stat_tool.data_transform import Merge -from test_correlation import CorrelationData -from test_tops import TopsData -from test_semi_markov import SemiMarkovData +from .test_correlation import CorrelationData +from .test_tops import TopsData +from .test_semi_markov import SemiMarkovData def test_merge_histo(): @@ -29,11 +30,12 @@ def test_merge_renewal_data(): def test_merge_sequences(): - from test_sequences import Test as sequences_data + from .test_sequences import Test as sequences_data + sequences = sequences_data() data = sequences.build_data() assert Merge(data, data) - + def test_merge_vom_data(): """test not yet implemented""" @@ -44,9 +46,9 @@ def _test_merge_semi_markov_data(): sm1 = SemiMarkovData() sm2 = SemiMarkovData() sm = Merge(sm1, sm2) - - #todo this plot does not work right now ? - #sm.plot() + + # todo this plot does not work right now ? + # sm.plot() def test_merge_nonhomogenesous_markov_data(): @@ -54,25 +56,26 @@ def test_merge_nonhomogenesous_markov_data(): pass -def test_merge_tops(): - t1 = TopsData() - t2 = TopsData() - t = Merge(t1, t2) - t.plot() - +## def test_merge_tops(): +## t1 = TopsData() +## t2 = TopsData() +## t = Merge(t1, t2) +## t.plot() + + def test_merge_correlation(): c1 = CorrelationData(1) c2 = CorrelationData(2) c3 = CorrelationData(3) - c = Merge(c1,c2,c3) - c_bis = c1.merge([c2,c3]) - assert str(c)==str(c_bis) + c = Merge(c1, c2, c3) + c_bis = c1.merge([c2, c3]) + assert str(c) == str(c_bis) c.plot() -if __name__ == "__main__": - test_merge_tops() - test_merge_correlation() - #test_merge_semi_markov_data() - test_merge_sequences() - +## if __name__ == "__main__": +## test_merge_tops() +## test_merge_correlation() +## #test_merge_semi_markov_data() +## test_merge_sequences() +## diff --git a/test/test_moving_average.py b/test/test_moving_average.py index 9546c42..37e5086 100644 --- a/test/test_moving_average.py +++ b/test/test_moving_average.py @@ -1,36 +1,38 @@ -""" Test moving average +"""Test moving average .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr .. todo:: to be done """ + __revision__ = "$Id$" -from openalea.sequence_analysis import * -from openalea.stat_tool.distribution import Distribution -from tools import runTestClass +import pytest + +from openalea.sequence_analysis import Cluster, Distribution, MovingAverage, Sequences -seq = Sequences(get_shared_data("pin_laricio_7x.seq")) -seq70 = Cluster(seq, "Step", 1, 10) +from .tools import robust_path as get_shared_data -class Test(): - def __init__(self): - self.data = seq70 +@pytest.fixture +def create_data_moving_average(): + seq = Sequences(get_shared_data("pin_laricio_7x.seq")) + return Cluster(seq, "Step", 1, 10) - def test_distribution(self): - seq70 = self.data + +class Test: + def test_distribution(self, create_data_moving_average): + seq70 = create_data_moving_average MovingAverage(seq70, Distribution("B", 0, 16, 0.5), BeginEnd=True) - MovingAverage(seq70, Distribution("B", 0, 16, 0.5), BeginEnd=True, Output="Residual") + MovingAverage( + seq70, Distribution("B", 0, 16, 0.5), BeginEnd=True, Output="Residual" + ) - def test_frequencies(self): - seq70 = self.data + def test_frequencies(self, create_data_moving_average): + seq70 = create_data_moving_average MovingAverage(seq70, [1, 1, 1], BeginEnd=True) MovingAverage(seq70, [1, 1, 1], BeginEnd=True, Output="Residual") def test_filter(self): """test not yet implemented""" pass - -if __name__ == "__main__": - runTestClass(Test()) diff --git a/test/test_nonhomogeneous.py b/test/test_nonhomogeneous.py index 06da8ae..efdc85e 100644 --- a/test/test_nonhomogeneous.py +++ b/test/test_nonhomogeneous.py @@ -2,40 +2,45 @@ .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr """ + __revision__ = "$Id: test_semi_markov.py 8204 2010-02-19 10:27:45Z cokelaer $" +import pytest -#from openalea.stat_tool import _stat_tool -#from openalea.sequence_analysis import _sequence_analysis +# from openalea.stat_tool import _stat_tool +# from openalea.sequence_analysis import _sequence_analysis from openalea.sequence_analysis.nonhomogeneous_markov import NonhomogeneousMarkov -from openalea.sequence_analysis import get_shared_data -#from openalea.sequence_analysis.simulate import Simulate -#from openalea.sequence_analysis.sequences import Sequences -#from openalea.stat_tool.data_transform import * -#from openalea.stat_tool.cluster import Cluster -#from openalea.stat_tool.cluster import Transcode, Cluster -from tools import interface -from tools import runTestClass, robust_path as get_shared_data +# from openalea.sequence_analysis.simulate import Simulate +# from openalea.sequence_analysis.sequences import Sequences +# from openalea.stat_tool.data_transform import * +# from openalea.stat_tool.cluster import Cluster +# from openalea.stat_tool.cluster import Transcode, Cluster + +from .tools import interface +from .tools import robust_path as get_shared_data +@pytest.fixture def NonhomogeneousMarkovData(): - seq = Sequences(str(get_shared_data('vanille_m.seq'))) + seq = Sequences(str(get_shared_data("vanille_m.seq"))) mc_m = Estimate(seq_m, "NONHOMOGENEOUS_MARKOV", "MONOMOLECULAR", "VOID") return mc_m + class Test(interface): - """a simple unittest class for nonhomogeneous data + """a simple unittest class for nonhomogeneous data""" - """ def __init__(self): - interface.__init__(self, - self.build_data(), - str(get_shared_data("test_nonhomogeneous.dat")), - NonhomogeneousMarkov) + interface.__init__( + self, + self.build_data(), + str(get_shared_data("test_nonhomogeneous.dat")), + NonhomogeneousMarkov, + ) def build_data(self): - sm = NonhomogeneousMarkov(str(get_shared_data('test_nonhomogeneous.dat'))) + sm = NonhomogeneousMarkov(str(get_shared_data("test_nonhomogeneous.dat"))) return sm def test_empty(self): @@ -78,7 +83,7 @@ def _test_simulate(self): def test_extract(self): pass - #self.data.extract(0,1) + # self.data.extract(0,1) def test_extract_data(self): pass diff --git a/test/test_renewal.py b/test/test_renewal.py index 90fe73f..ce93ac4 100644 --- a/test/test_renewal.py +++ b/test/test_renewal.py @@ -1,11 +1,13 @@ -""" Test renewal data structure +"""Test renewal data structure .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr .. todo:: to be done """ + __revision__ = "$Id$" +import pytest from openalea.stat_tool import _stat_tool from openalea.sequence_analysis import _sequence_analysis @@ -16,69 +18,102 @@ from openalea.sequence_analysis.data_transform import TimeScaling from openalea.sequence_analysis import get_shared_data from openalea.stat_tool.cluster import Cluster -from openalea.stat_tool.cluster import Transcode, Cluster -from tools import interface -from tools import runTestClass, robust_path as get_shared_data +from .tools import interface +from .tools import runTestClass, robust_path as get_shared_data + +@pytest.fixture +def build_data_renewal(): + """todo: check identifier output. should be a list""" + # build a list of 2 sequences with a variable that should be identical + # to sequences1.seq + return TimeEvents(str(get_shared_data("test_time_events.dat"))) class Test(interface): - """to be done + """to be done""" - """ def __init__(self): - interface.__init__(self, - self.build_data(), - str(get_shared_data("test_time_events.dat")), - Renewal) + interface.__init__( + self, + self.build_data(), + str(get_shared_data("test_time_events.dat")), + Renewal, + ) def build_data(self): - """todo: check identifier output. should be a list """ + """todo: check identifier output. should be a list""" # build a list of 2 sequences with a variable that should be identical # to sequences1.seq - return TimeEvents(str(get_shared_data('test_time_events.dat'))) + return TimeEvents(str(get_shared_data("test_time_events.dat"))) def test_constructor_negative_binomial(self): proba = 0.5 inf_bound = 0 - param = 1. - Renewal("NEGATIVE_BINOMIAL", inf_bound, param, - proba, Type="Equilibrium", - ObservationTime=40) + param = 1.0 + Renewal( + "NEGATIVE_BINOMIAL", + inf_bound, + param, + proba, + Type="Equilibrium", + ObservationTime=40, + ) def test_constructor_binomial(self): inf_bound = 0 sup_bound = 10 - probability = 1. - Renewal("BINOMIAL", inf_bound, sup_bound, - probability, Type="Equilibrium", - ObservationTime=40) + probability = 1.0 + Renewal( + "BINOMIAL", + inf_bound, + sup_bound, + probability, + Type="Equilibrium", + ObservationTime=40, + ) def test_constructor_poisson(self): inf_bound = 0 probability = 0.5 - param = 1. - Renewal("POISSON", inf_bound, param, - probability, Type="Equilibrium", - ObservationTime=40) + param = 1.0 + Renewal( + "POISSON", + inf_bound, + param, + probability, + Type="Equilibrium", + ObservationTime=40, + ) def test_constructor_scale(self): inf_bound = 0 probability = 0.5 - param = 1. - Renewal("POISSON", inf_bound, param, - probability, Type="Equilibrium", - ObservationTime=40, Scale=0.5) + param = 1.0 + Renewal( + "POISSON", + inf_bound, + param, + probability, + Type="Equilibrium", + ObservationTime=40, + Scale=0.5, + ) def test_constructor_not_implemented(self): try: inf_bound = 0 probability = 0.5 - param = 1. - Renewal("NOT_IMPLEMENTED", inf_bound, param, - probability, Type="Equilibrium", - ObservationTime=40) + param = 1.0 + Renewal( + "NOT_IMPLEMENTED", + inf_bound, + param, + probability, + Type="Equilibrium", + ObservationTime=40, + ) assert False except: @@ -89,23 +124,32 @@ def test_constructor_from_model(self): from openalea.stat_tool.mixture import Mixture from openalea.stat_tool.convolution import Convolution from openalea.stat_tool.distribution import Binomial - Renewal(Compound(Binomial(0,10,0.5), Binomial(0,10,0.3)), - Type="Equilibrium", ObservationTime=20) - Renewal(Mixture(0.1, Binomial(0,10,0.5), 0.9, Binomial(0,10,0.3)), - Type="Equilibrium", ObservationTime=20) - Renewal(Compound(Binomial(0,10,0.5), Binomial(0,10,0.3)), - Type="Equilibrium", ObservationTime=20) + + Renewal( + Compound(Binomial(0, 10, 0.5), Binomial(0, 10, 0.3)), + Type="Equilibrium", + ObservationTime=20, + ) + Renewal( + Mixture(0.1, Binomial(0, 10, 0.5), 0.9, Binomial(0, 10, 0.3)), + Type="Equilibrium", + ObservationTime=20, + ) + Renewal( + Compound(Binomial(0, 10, 0.5), Binomial(0, 10, 0.3)), + Type="Equilibrium", + ObservationTime=20, + ) def test_constructor_not_implemented2(self): try: Renewal(2, 1) - print('here') + print("here") assert False except: assert True - def _test_empty(self): self.empty() @@ -121,7 +165,7 @@ def test_print(self): def test_display(self): self.display() self.display_versus_ascii_write() - #self.display_versus_str() + # self.display_versus_str() def test_len(self): seq = self.data @@ -146,7 +190,7 @@ def test_spreadsheet_write(self): self.spreadsheet_write() def _test_simulate(self): - #self.simulate() + # self.simulate() pass def test_extract(self): @@ -161,7 +205,6 @@ def test_get_htime(self): data = self.data histo = data.get_htime() - def test_get_hnb_event(self): data = self.data histo = data.get_hnb_event(20) @@ -184,17 +227,15 @@ def test_time_scaling(self): mod = TimeScaling(self.data, 2) assert str(aml) == str(mod) - def test_merge(self): time1 = self.data time2 = self.data - assert str(Merge(time1,time2)) == str(time1.merge([time2])) + assert str(Merge(time1, time2)) == str(time1.merge([time2])) def test_time_select(self): - #max value must be greater than the offset. - data= self.data - data.time_select(3,35) - + # max value must be greater than the offset. + data = self.data + data.time_select(3, 35) if __name__ == "__main__": diff --git a/test/test_renewal_functional.py b/test/test_renewal_functional.py index ae2d310..069e126 100644 --- a/test/test_renewal_functional.py +++ b/test/test_renewal_functional.py @@ -15,7 +15,7 @@ """ from openalea.sequence_analysis import * -from tools import runTestClass, robust_path as get_shared_data +from .tools import runTestClass, robust_path as get_shared_data def test1(): diff --git a/test/test_semi_markov.py b/test/test_semi_markov.py index bacf125..6a96c48 100644 --- a/test/test_semi_markov.py +++ b/test/test_semi_markov.py @@ -2,98 +2,83 @@ .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr """ + __revision__ = "$Id$" +import pytest -from openalea.stat_tool import _stat_tool -from openalea.sequence_analysis import _sequence_analysis -from openalea.sequence_analysis import * +from openalea.sequence_analysis import SemiMarkov, Simulate -from openalea.stat_tool.data_transform import * -from openalea.stat_tool.cluster import Cluster -from openalea.stat_tool.cluster import Transcode, Cluster -from tools import interface -from tools import runTestClass, robust_path as get_shared_data +from .tools import interface +from .tools import robust_path as get_shared_data -def SemiMarkovData(): - sm = SemiMarkov(str(get_shared_data('test_semi_markov.dat'))) - ret = Simulate(sm, 1, 1000, True) - return sm +@pytest.fixture +def interface_instance(): + filename = str(get_shared_data("test_semi_markov.dat")) + return interface( + data=SemiMarkov(filename), + filename=filename, + Structure=SemiMarkov, + ) -class Test(interface): - """a simple unittest class +class TestSemiMarkov: + """a simple unittest class""" - """ - def __init__(self): - interface.__init__(self, - self.build_data(), - str(get_shared_data("test_semi_markov.dat")), - SemiMarkov) + def test_constructor_from_file(self, interface_instance): + interface_instance.constructor_from_file() def build_data(self): - """todo: check identifier output. should be a list """ + """todo: check identifier output. should be a list""" # build a list of 2 sequences with a variable that should be identical # to sequences1.seq - sm = SemiMarkov(str(get_shared_data('test_semi_markov.dat'))) - + return SemiMarkov(str(get_shared_data("test_semi_markov.dat"))) - return sm - - def _test_empty(self): - self.empty() + def test_simulate(self, interface_instance): + Simulate(interface_instance.data, 1, 1000, True) + pass - def test_constructor_from_file(self): - self.constructor_from_file() + def test_empty(self, interface_instance): + interface_instance.empty() - def test_constructor_from_file_failure(self): - self.constructor_from_file_failure() + def test_constructor_from_file_failure(self, interface_instance): + interface_instance.constructor_from_file_failure() - def test_print(self): - self.print_data() + def test_print(self, interface_instance): + interface_instance.print_data() - def test_display(self): - self.display() - self.display_versus_ascii_write() - self.display_versus_str() + def test_display(self, interface_instance): + interface_instance.display() + interface_instance.display_versus_ascii_write() + interface_instance.display_versus_str() - def test_len(self): - seq = self.data + def test_len(self, interface_instance): + seq = interface_instance.data pass - def test_plot(self): - self.plot() + def test_plot(self, interface_instance): + interface_instance.plot() - def test_save(self): - self.save(skip_reading=True) + def test_save(self, interface_instance): + interface_instance.save(skip_reading=True) - def test_plot_write(self): - self.plot_write() + def test_plot_write(self, interface_instance): + interface_instance.plot_write() - def test_file_ascii_write(self): - self.file_ascii_write() + def test_file_ascii_write(self, interface_instance): + interface_instance.file_ascii_write() - def test_spreadsheet_write(self): - self.spreadsheet_write() + def test_spreadsheet_write(self, interface_instance): + interface_instance.spreadsheet_write() - def test_simulate(self): - sm = self.data - sm.simulation_nb_elements(1, 10000, True) - Simulate(sm,1, 10000, True) - pass - - def test_thresholding(self): - self.data.thresholding(1) + def test_thresholding(self, interface_instance): + interface_instance.data.thresholding(1) - def test_extract(self): + def test_extract(self, interface_instance): pass - #self.data.extract(0,1,1) - - def test_extract_data(self): - self.data.extract_data() - + # interface_instance.data.extract(0,1,1) -if __name__ == "__main__": - runTestClass(Test()) + def test_extract_data(self, interface_instance): + interface_instance.data.extract_data() diff --git a/test/test_semi_markov_switching_lm_functional.py b/test/test_semi_markov_switching_lm_functional.py index acf109a..08dc596 100644 --- a/test/test_semi_markov_switching_lm_functional.py +++ b/test/test_semi_markov_switching_lm_functional.py @@ -2,6 +2,7 @@ """tests on mv_mixture""" __version__ = "$Id$" +import openalea.sequence_analysis as sa from openalea.stat_tool import _stat_tool from openalea.sequence_analysis import _sequence_analysis from openalea.sequence_analysis.hidden_semi_markov import HiddenSemiMarkov @@ -12,6 +13,7 @@ from openalea.stat_tool.data_transform import * from openalea.stat_tool.cluster import Cluster from openalea.stat_tool.cluster import Transcode, Cluster +from pathlib import Path import openalea.stat_tool.plot #import DISABLE_PLOT # openalea.stat_tool.plot.DISABLE_PLOT = True @@ -29,16 +31,13 @@ def test1(): - from pathlib import Path - from openalea.sequence_analysis import _MarkovianSequences + _MarkovianSequences = sa._MarkovianSequences - data_path = Path(openalea.sequence_analysis.__path__[0]) - data_path = str(Path.joinpath(data_path.parent.parent.parent.absolute(), "share","data")) - model_file = "switching_lmm_irred.hsc" + model_file = sa.get_shared_data("switching_lmm_irred.hsc") - hsm = HiddenSemiMarkov(data_path + os.sep + model_file) + hsm = HiddenSemiMarkov(str(model_file)) print(hsm.display()) - from openalea.sequence_analysis import Simulate + Simulate = sa.Simulate nb_seq = 30 seq_length = 100 set_seed(0) @@ -51,8 +50,7 @@ def test1(): from openalea.sequence_analysis import Estimate hsmd = hsm.simulation_nb_sequences(nb_seq, seq_length, True) seq_estim = hsmd.select_variable([2], True) - from openalea.stat_tool import NegativeBinomial - d = NegativeBinomial(1, 10, 0.5) + d = sa.NegativeBinomial(1, 10, 0.5) index = [] for u in range(nb_seq): indexut = [] diff --git a/test/test_sequences.py b/test/test_sequences.py index bb49bdb..f25497d 100644 --- a/test/test_sequences.py +++ b/test/test_sequences.py @@ -3,478 +3,547 @@ .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr """ + __revision__ = "$Id$" from openalea.stat_tool import _stat_tool +import pytest from openalea.sequence_analysis import _sequence_analysis from openalea.sequence_analysis.sequences import Sequences, Split, SaveMTG -from openalea.sequence_analysis.data_transform import Cumulate, Difference, \ - ExtractVectors, IndexParameterExtract, IndexParameterSelect, RecurrenceTimeSequences +from openalea.sequence_analysis.data_transform import ( + Cumulate, + Difference, + ExtractVectors, + IndexParameterExtract, + IndexParameterSelect, + RecurrenceTimeSequences, +) from openalea.stat_tool.data_transform import * from openalea.stat_tool.cluster import Cluster from openalea.stat_tool.cluster import Transcode, Cluster -from tools import interface -from tools import runTestClass +from .tools import interface +from .tools import runTestClass from openalea.sequence_analysis.sequences import Sequences, IndexParameterType -from tools import robust_path as get_shared_data - -class Test(interface): - - def __init__(self): - interface.__init__(self, - self.build_data(), - str(get_shared_data("sequences1.seq")), - Sequences) - self.seqn = self.build_seqn() - self.seqrealn = self.build_seq_realn() - self.seq1 = self.build_seq1() - - def build_data(self): - """todo: check identifier output. should be a list """ - # build a list of 2 sequences with a variable that should be identical - # to sequences1.seq - data = Sequences([ - [1, 0, 0, 0, 1, 1, 2, 0, 2, 2, 2, 1, 1, 0, 1, 0, - 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 1], - [0, 0, 0, 1, 1, 0, 2, 0, 2, 2 ,2 ,1 ,1 ,1 ,1 ,0 ,1 - ,0 ,0 ,0 ,0 ,0]]) - assert data - - assert data.nb_sequence == 2 - assert data.nb_variable == 1 - assert data.cumul_length == 52 - assert data.max_length == 30 - - assert [0, 1] == data.get_identifiers() - - return data - - def build_seqn(self): - s = Sequences([[[1,1,1],[12,12,12]],[[2,2,2],[22,23,24]]]) - return s - - def build_seq1(self): - s = Sequences([[1,1,1],[2,2,2]]) - return s - - def build_seq_realn(self): - s = Sequences([[[1.5,1.5,1.5],[12.5,12.5,12.5]],[[2.5,2.5,2.5],[22.5,23.5,24.5]]]) - return s - - def build_seq_wrong_identifiers(self): - try: - s = Sequences([[1,1,1],[2,2,2]], Identifiers=[-1,1]) - assert False - except: - assert True - - def _test_empty(self): - self.empty() - - def test_constructor_from_file(self): - self.constructor_from_file() - - def test_constructor_from_file_failure(self): - self.constructor_from_file_failure() - - def test_print(self): - self.print_data() - - def test_display(self): - self.display() - self.display_versus_ascii_write() - self.display_versus_str() - - def test_len(self): - seq = self.data - assert len(seq) == 2 - assert len(seq) == seq.nb_sequence - - def test_plot(self): - self.plot() - - def test_save(self): - self.save(skip_reading=True) - - def test_plot_write(self): - self.plot_write() - - def test_file_ascii_write(self): - self.file_ascii_write() - - def test_spreadsheet_write(self): - self.spreadsheet_write() - - def test_extract(self): - #todo - seqn = self.seqn - assert seqn.extract_value(1) - assert seqn.extract_value(2) - - def test_extract_data(self): - pass - - def test_index_parameter_type(self): - - seq1 = Sequences([[1.,1,1],[2.,2,2.]]) - assert IndexParameterType(seq1)=='IMPLICIT_TYPE' - seq1 = Sequences([[1.,1,1],[2.,2,2.]], IndexParameterType="TIME") - assert IndexParameterType(seq1)=='TIME' - seq1 = Sequences([[1.,1,1],[2.,2,2.]],IndexParameterType="POSITION" ) - assert IndexParameterType(seq1)=='POSITION' - - - def test_constructors(self): - # heterogeneous or homogeneous type - seq1 = Sequences([1, 2, 3, 4]) - seq1 = Sequences([1, 2, 3, 4.]) - assert seq1.nb_sequence == 1 - assert seq1.nb_variable == 1 - - # single sequence multivariate - seq2 = Sequences([[1,2],[3,4], [5,6]]) - assert seq2.nb_sequence==1 - assert seq2.nb_variable==2 - #ambiguous case (length>5) - seq2 = Sequences([[1,2,3,4,5,6],[3,4,3,4,5,6], [5,6,4,5,6,7]]) - assert seq2.nb_sequence==3 - assert seq2.nb_variable==1 - - - # univariates sequences - seq3 = Sequences([[1,2],[3,4], [5,6,7]]) - assert seq3.nb_sequence==3 - assert seq3.nb_variable==1 - - # general case - seq4 = Sequences([ [[1,2],[3,4]], [[21,22],[23,24]], [[31,32],[33,34], [35,36] ]]) - assert seq4.nb_sequence==3 - assert seq4.nb_variable==2 - - seq4 = Sequences([ [[1,2],[3,4]], [[21,22],[23,24]], [[31,32],[33,34], [35,36] ]], - VertexIdentifiers=[[1,2],[3,4],[5,6,7]], Identifiers=[1,2,3]) - - seq4 = Sequences([ [[1,2],[3,4]], [[21,22],[23,24]], [[31,32],[33,34], [35,36] ]], - IndexParameterType="POSITION", IndexParameter=[[0,1,10], [2,3,11], [4,5,6,12]]) - - seq4 = Sequences([ [[1,2],[3,4]], [[21,22],[23,24]], [[31,32],[33,34], [35,36] ]], - IndexParameterType="TIME", IndexParameter=[[0,1], [2,3], [4,5,6]]) - - - - - def test_constructor_one_sequence(self): - # two sequences with 2 variables (int) - s = Sequences([[1,1,1],[2,2,2]]) - assert s - # two sequences with 2 variables (real) - s = Sequences([[1.,1.,1.],[2.,2.,2.]]) - assert s - # two sequences with 2 variables mix of (real and int) - # works because the first one is float so all others are assume to be float as well - s = Sequences([[1.,1.,1.],[2.,2.,2]]) - assert s - # here it fails because the first number is int but others may be float - try: - s = Sequences([[1,1.,1.],[2.,2.,2]]) - assert False - except: - assert True - - def test_constructor_two_sequences(self): - # two sequences with 2 variables (int) - s = Sequences([[[1,1,1],[12,12,12]],[[2,2,2],[22,23,24]]]) - assert s - s = Sequences([[[1.,1.,1.],[1.,1.,1.]],[[2.,2.,2.],[2.,2.,2.]]]) - assert s - - def test_container(self): - # first index is the sequence and second index is the variable - s = self.seqn - assert s[0,0] == [1,1,1] - assert s[0,1] == [12,12,12] - assert s[1,0] == [2,2,2] - assert s[1,1] == [22,23,24] - assert s[1,1][1] == 23 - assert len(s) == 2 - - s = self.seq1 - assert s[0,0] == [1,1,1] - assert s[0,1] == [2,2,2] - assert s[0,0][0] == 1 - assert len(s) == 1 - - - - def test_value_select(self): - "test_value_select implemented but need to be checked" - seqn = self.seqn - a = seqn.value_select(1, 1, 2,True) - assert a - assert str(ValueSelect(seqn, 1, 1, 2)) == str(seqn.value_select(1,1,2, True)) - - def test_select_variable_int(self): - "test_select_variable_int implemented but need to be checked (index issue)" - # !!!!!!!! NEED to CHECK THE INDEX 0, 1 , ... or 1,2,.... - # what about identifiers ? - # Variable seems to start at 1 not 0 - s = self.seqn - #select variable 1 - select = s.select_variable([1], keep=True) - assert select[0,0] == [1] - assert select[1,0] == [2] - - def test_select_variable_real(self): - "test_select_variable_real implemented but need to be checked (index issue)" - # !!!!!!!! NEED to CHECK THE INDEX 0, 1 , ... or 1,2,.... - # what about identifiers ? - # Variable seems to start at 1 not 0 - s = self.seqrealn - #select variable 1 - select = s.select_variable([1], keep=True) - assert select[0,0] == [1.5] - assert select[1,0] == [2.5] - - - def test_select_individual(self): - #select one or several sequences - s = self.seqn - - # select all - select = s.select_individual([0,1], keep=True) - assert s.display() == select.markovian_sequences().display() - - #select first sequence only - select = s.select_individual([0], keep=True) - assert select[0,0] == [1, 1, 1] - assert select[0,1] == [12, 12, 12] - try: - select[1,0] - assert False - except: - assert True - - #select second sequence only - select = s.select_individual([1], keep=True) - assert select[0,0] == [2, 2, 2] - assert select[0,1] == [22, 23, 24] - try: - select[1,0] - assert False - except: - assert True - - def test_shift_seqn(self): - s = self.seqn - shifted = s.shift(1,2) - assert shifted[0,0] == [3,1,1] - assert shifted[0,1] == [14,12,12] - assert shifted[1,0] == [4,2,2] - assert shifted[1,1] == [24,23,24] - - - def test_shift_seq1(self): - s = self.seq1 - shifted = s.shift(1,2) - assert shifted[0,0] == [3, 1, 1] - assert shifted[0,1] == [4, 2, 2] - - def test_threshold_seq1(self): - s = self.seqn - thresholded = s.thresholding(1,10,"ABOVE") - for x in thresholded: - for v in x: - assert(v[0] <= 10) - - def test_threshold_seq(self): - s = Sequences([[[1.01,1.07],[2.01,2.07],[1.99,1.07],[2.41,2.07]],[[1.97,1.07],[1.98,2.07],[1.99,1.07],[2.00,2.07]]]) - thresholded = s.thresholding(1,1.99, "ABOVE") - for x in thresholded: - for v in x: - assert(v[0] <= 1.99) - thresholded = s.thresholding(1,1.99, "BELOW") - for x in thresholded: - for v in x: - assert(v[0] >= 1.99) - - assert s - def test_merge (self): - s1 = self.seqn - s2 = self.seqn - s3 = self.seq1 - - sall = s1.merge([s2]) - assert sall.nb_sequence == 4 - assert sall.nb_variable == 3 - - sall = s1.merge([s3]) - assert sall.nb_sequence == 3 - assert sall.nb_variable == 3 - - def test_merge_and_Merge(self): - s1 = self.seqn - s2 = self.seqn - - a = s1.merge([s2]) - b = s2.merge([s1]) - v = Merge(s1, s2) - - assert str(a) == str(b) - assert str(a) == str(v) - - def test_imcompatible_merge(self): - s1 = self.seqn - s3 = self.seq1 - try: - s1.merge(s3) - assert False - except: - assert True - - def test_merge_variable(self): - import copy - s1 = self.seqn - s2 = self.seqn - s3 = self.seq1 - - sall = s1.merge_variable([s2],1) # why 1 ? same result with 2 ! - assert sall.nb_sequence == 2 - assert sall.nb_variable == 6 - - # sall = s1.merge_variable([s3],1) - #assert sall.nb_sequence == 2 - #assert sall.nb_variable == 3 - - def test_merge_variable_and_MergeVariable(self): - s1 = self.seqn - s2 = self.seqn - s3 = self.seq1 - - a = s1.merge_variable([s2],1) - b = s2.merge_variable([s1],1) - v = MergeVariable(s1,s2) - - assert str(a) == str(b) - assert str(a) == str(v) - - - def test_cluster_step(self): - seq1 = Sequences([[1, 2, 3], [1, 3, 1], [4, 5, 6]]) - assert str(Cluster(seq1, "Step", 1, 2)) == str(seq1.cluster_step(1, 2, True)) - seqn = Sequences([[[1, 2, 3], [1, 3, 1]], [[4, 5, 6], [7,8,9]]]) - assert str(Cluster(seqn, "Step", 1, 2)) == str(seqn.cluster_step(1, 2, True)) - - def test_cluster_limit(self): - seq1 = Sequences([[1, 2, 3], [1, 3, 1], [4, 5, 6]]) - assert str(Cluster(seq1, "Limit", 1, [2])) == \ - str(seq1.cluster_limit(1, [2], True)) - seqn = Sequences([[[1, 2, 3], [1, 3, 1]], [[4, 5, 6], [7,8,9]]]) - assert str(Cluster(seqn, "Limit", 1, [2, 4, 6])) == \ - str(seqn.cluster_limit(1, [2, 4 ,6], True)) - - def test_transcode(self): - """This functionality need to be checked. - - See also the vector case!""" - seq = self.seqn - assert str(seq.transcode(1, [0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0o1,1,1,1,1,0,0], False))==\ - str(Transcode(seq, 1, [0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0o1,1,1,1,1,0,0])) - - def test_reverse(self): - """reverse to be checked. seems to give same output as input""" - s = self.seqn - s.reverse() - - - def test_max_length(self): - s = self.data - assert s.max_length==30 - - def test_get_max_value(self): - s = self.data - assert s.get_max_value(0)==2 - - def test_get_min_value(self): - s = self.data - assert s.get_min_value(0)==0 - - def test_get_length(self): - s = self.data - assert s.get_length(0)==30 - assert s.get_length(1)==22 - - def test_difference(self): - data = self.data - assert str(Difference(data, 1)) == str(data.difference(1, False)) - res = Difference(data, 1) - assert res.cumul_length == 50 - - def test_cumulate(self): - #see also test_cumulate for more tests - s = self.data - res = Cumulate(s) - assert res.cumul_length == 52 - - - def test_extract_vectors(self): - """see test_extract_vectors""" - ExtractVectors(self.data, "Length") - - def _test_index_parameter_extract(self): - """fixme: markovian_sequences should be in wrapper ? """ - aml = self.data.index_parameter_extract(0, 29).markovian_sequences() - mod = IndexParameterExtract(self.data, 0, MaxIndex=29) - assert str(aml) == str(mod) - - - def test_index_parameter_select(self): - """test to be done""" - pass - - def test_recurrence_time_sequences(self): - aml = self.data.recurrence_time_sequences(1, 1) - mod = RecurrenceTimeSequences(self.data, 1, 1) - assert str(aml.markovian_sequences()) == str(mod) - - def test_remove_run(self): - """test to be done""" - pass - def test_transform_position(self): - """test to be done""" - pass - def test_segmentation_extract(self): - """test to be done""" - pass - def test_variable_scaling(self): - """test to be done""" - pass - def test_remove_index_parameter(self): - """test to be done""" - self.data - - - def test_write_mtg(self): - import os - self.data.mtg_write('test.mtg', [1,2]) - os.remove('test.mtg') - - def test_SaveMTG(self): - SaveMTG(self.data,Filename='test.mtg', Type=['N']) - - def test_split(self): - #markovian sequences - data = Sequences(str(get_shared_data('vanille_m.seq'))) - Split(data, 2) - - def test_initial_run(self): - from openalea.sequence_analysis import ComputeInitialRun - #markovian sequences - data = Sequences(str(get_shared_data('vanille_m.seq'))) - ComputeInitialRun(data) +from .tools import robust_path as get_shared_data + + +@pytest.fixture +def build_data(): + """todo: check identifier output. should be a list""" + # build a list of 2 sequences with a variable that should be identical + # to sequences1.seq + data = Sequences( + [ + [ + 1, + 0, + 0, + 0, + 1, + 1, + 2, + 0, + 2, + 2, + 2, + 1, + 1, + 0, + 1, + 0, + 1, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 2, + 2, + 2, + 1, + ], + [0, 0, 0, 1, 1, 0, 2, 0, 2, 2, 2, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0], + ] + ) + assert data + + assert data.nb_sequence == 2 + assert data.nb_variable == 1 + assert data.cumul_length == 52 + assert data.max_length == 30 + + assert [0, 1] == data.get_identifiers() + + return data + + +@pytest.fixture +def build_seqn(): + return Sequences([[[1, 1, 1], [12, 12, 12]], [[2, 2, 2], [22, 23, 24]]]) + + +@pytest.fixture +def build_seq1(): + return Sequences([[1, 1, 1], [2, 2, 2]]) + + +@pytest.fixture +def build_seq_realn(): + return Sequences( + [ + [[1.5, 1.5, 1.5], [12.5, 12.5, 12.5]], + [[2.5, 2.5, 2.5], [22.5, 23.5, 24.5]], + ] + ) + + +def build_seq_wrong_identifiers(): + try: + s = Sequences([[1, 1, 1], [2, 2, 2]], Identifiers=[-1, 1]) + assert False + except: + assert True + + +def test_len(build_data): + seq = build_data + assert len(seq) == 2 + assert len(seq) == seq.nb_sequence + + +def test_extract(build_seqn): + # todo + seqn = build_seqn + assert seqn.extract_value(1) + assert seqn.extract_value(2) + + +def test_index_parameter_type(): + seq1 = Sequences([[1.0, 1, 1], [2.0, 2, 2.0]]) + assert IndexParameterType(seq1) == "IMPLICIT_TYPE" + seq1 = Sequences([[1.0, 1, 1], [2.0, 2, 2.0]], IndexParameterType="TIME") + assert IndexParameterType(seq1) == "TIME" + seq1 = Sequences([[1.0, 1, 1], [2.0, 2, 2.0]], IndexParameterType="POSITION") + assert IndexParameterType(seq1) == "POSITION" + + +def test_constructors(): + # heterogeneous or homogeneous type + seq1 = Sequences([1, 2, 3, 4]) + seq1 = Sequences([1, 2, 3, 4.0]) + assert seq1.nb_sequence == 1 + assert seq1.nb_variable == 1 + + # single sequence multivariate + seq2 = Sequences([[1, 2], [3, 4], [5, 6]]) + assert seq2.nb_sequence == 1 + assert seq2.nb_variable == 2 + # ambiguous case (length>5) + seq2 = Sequences([[1, 2, 3, 4, 5, 6], [3, 4, 3, 4, 5, 6], [5, 6, 4, 5, 6, 7]]) + assert seq2.nb_sequence == 3 + assert seq2.nb_variable == 1 + + # univariates sequences + seq3 = Sequences([[1, 2], [3, 4], [5, 6, 7]]) + assert seq3.nb_sequence == 3 + assert seq3.nb_variable == 1 + + # general case + seq4 = Sequences( + [[[1, 2], [3, 4]], [[21, 22], [23, 24]], [[31, 32], [33, 34], [35, 36]]] + ) + assert seq4.nb_sequence == 3 + assert seq4.nb_variable == 2 + + seq4 = Sequences( + [[[1, 2], [3, 4]], [[21, 22], [23, 24]], [[31, 32], [33, 34], [35, 36]]], + VertexIdentifiers=[[1, 2], [3, 4], [5, 6, 7]], + Identifiers=[1, 2, 3], + ) + + seq4 = Sequences( + [[[1, 2], [3, 4]], [[21, 22], [23, 24]], [[31, 32], [33, 34], [35, 36]]], + IndexParameterType="POSITION", + IndexParameter=[[0, 1, 10], [2, 3, 11], [4, 5, 6, 12]], + ) + + seq4 = Sequences( + [[[1, 2], [3, 4]], [[21, 22], [23, 24]], [[31, 32], [33, 34], [35, 36]]], + IndexParameterType="TIME", + IndexParameter=[[0, 1], [2, 3], [4, 5, 6]], + ) + + +def test_constructor_one_sequence(): + # two sequences with 2 variables (int) + s = Sequences([[1, 1, 1], [2, 2, 2]]) + assert s + # two sequences with 2 variables (real) + s = Sequences([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]) + assert s + # two sequences with 2 variables mix of (real and int) + # works because the first one is float so all others are assume to be float as well + s = Sequences([[1.0, 1.0, 1.0], [2.0, 2.0, 2]]) + assert s + # here it fails because the first number is int but others may be float + try: + s = Sequences([[1, 1.0, 1.0], [2.0, 2.0, 2]]) + assert False + except: + assert True + + +def test_constructor_two_sequences(): + # two sequences with 2 variables (int) + s = Sequences([[[1, 1, 1], [12, 12, 12]], [[2, 2, 2], [22, 23, 24]]]) + assert s + s = Sequences( + [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0]]] + ) + assert s + + +def test_container(build_seq1, build_seqn): + # first index is the sequence and second index is the variable + s = build_seqn + assert s[0, 0] == [1, 1, 1] + assert s[0, 1] == [12, 12, 12] + assert s[1, 0] == [2, 2, 2] + assert s[1, 1] == [22, 23, 24] + assert s[1, 1][1] == 23 + assert len(s) == 2 + + s = build_seq1 + assert s[0, 0] == [1, 1, 1] + assert s[0, 1] == [2, 2, 2] + assert s[0, 0][0] == 1 + assert len(s) == 1 + + +def test_value_select(build_seqn): + "test_value_select implemented but need to be checked" + seqn = build_seqn + a = seqn.value_select(1, 1, 2, True) + assert a + assert str(ValueSelect(seqn, 1, 1, 2)) == str(seqn.value_select(1, 1, 2, True)) + + +def test_select_variable_int(build_seqn): + "test_select_variable_int implemented but need to be checked (index issue)" + # !!!!!!!! NEED to CHECK THE INDEX 0, 1 , ... or 1,2,.... + # what about identifiers ? + # Variable seems to start at 1 not 0 + s = build_seqn + # select variable 1 + select = s.select_variable([1], keep=True) + assert select[0, 0] == [1] + assert select[1, 0] == [2] + + +def test_select_variable_real(build_seq_realn): + "test_select_variable_real implemented but need to be checked (index issue)" + # !!!!!!!! NEED to CHECK THE INDEX 0, 1 , ... or 1,2,.... + # what about identifiers ? + # Variable seems to start at 1 not 0 + s = build_seq_realn + # select variable 1 + select = s.select_variable([1], keep=True) + assert select[0, 0] == [1.5] + assert select[1, 0] == [2.5] + + +def test_select_individual(build_seqn): + # select one or several sequences + s = build_seqn + + # select all + select = s.select_individual([0, 1], keep=True) + assert s.display() == select.markovian_sequences().display() + + # select first sequence only + select = s.select_individual([0], keep=True) + assert select[0, 0] == [1, 1, 1] + assert select[0, 1] == [12, 12, 12] + try: + select[1, 0] + assert False + except: + assert True + + # select second sequence only + select = s.select_individual([1], keep=True) + assert select[0, 0] == [2, 2, 2] + assert select[0, 1] == [22, 23, 24] + try: + select[1, 0] + assert False + except: + assert True + + +def test_shift_seqn(build_seqn): + s = build_seqn + shifted = s.shift(1, 2) + assert shifted[0, 0] == [3, 1, 1] + assert shifted[0, 1] == [14, 12, 12] + assert shifted[1, 0] == [4, 2, 2] + assert shifted[1, 1] == [24, 23, 24] + + +def test_shift_seq1(build_seq1): + s = build_seq1 + shifted = s.shift(1, 2) + assert shifted[0, 0] == [3, 1, 1] + assert shifted[0, 1] == [4, 2, 2] + + +def test_threshold_seq1(build_seqn): + s = build_seqn + thresholded = s.thresholding(1, 10, "ABOVE") + for x in thresholded: + for v in x: + assert v[0] <= 10 + + +def test_threshold_seq(): + s = Sequences( + [ + [[1.01, 1.07], [2.01, 2.07], [1.99, 1.07], [2.41, 2.07]], + [[1.97, 1.07], [1.98, 2.07], [1.99, 1.07], [2.00, 2.07]], + ] + ) + thresholded = s.thresholding(1, 1.99, "ABOVE") + for x in thresholded: + for v in x: + assert v[0] <= 1.99 + thresholded = s.thresholding(1, 1.99, "BELOW") + for x in thresholded: + for v in x: + assert v[0] >= 1.99 + + assert s + + +def test_merge(build_seqn, build_seq1): + s1 = build_seqn + s2 = build_seqn + s3 = build_seq1 + + sall = s1.merge([s2]) + assert sall.nb_sequence == 4 + assert sall.nb_variable == 3 + + sall = s1.merge([s3]) + assert sall.nb_sequence == 3 + assert sall.nb_variable == 3 + + +def test_merge_and_Merge(build_seqn): + s1 = build_seqn + s2 = build_seqn + + a = s1.merge([s2]) + b = s2.merge([s1]) + v = Merge(s1, s2) + + assert str(a) == str(b) + assert str(a) == str(v) + + +def test_imcompatible_merge(build_seqn, build_seq1): + s1 = build_seqn + s3 = build_seq1 + try: + s1.merge(s3) + assert False + except: + assert True + + +def test_merge_variable(build_seqn, build_seq1): + s1 = build_seqn + s2 = build_seqn + s3 = build_seq1 + + sall = s1.merge_variable([s2], 1) # why 1 ? same result with 2 ! + assert sall.nb_sequence == 2 + assert sall.nb_variable == 6 + + # sall = s1.merge_variable([s3],1) + # assert sall.nb_sequence == 2 + # assert sall.nb_variable == 3 + + +def test_merge_variable_and_MergeVariable(build_seqn, build_seq1): + s1 = build_seqn + s2 = build_seqn + s3 = build_seq1 + + a = s1.merge_variable([s2], 1) + b = s2.merge_variable([s1], 1) + v = MergeVariable(s1, s2) + + assert str(a) == str(b) + assert str(a) == str(v) + + +def test_cluster_step(): + seq1 = Sequences([[1, 2, 3], [1, 3, 1], [4, 5, 6]]) + assert str(Cluster(seq1, "Step", 1, 2)) == str(seq1.cluster_step(1, 2, True)) + seqn = Sequences([[[1, 2, 3], [1, 3, 1]], [[4, 5, 6], [7, 8, 9]]]) + assert str(Cluster(seqn, "Step", 1, 2)) == str(seqn.cluster_step(1, 2, True)) + + +def test_cluster_limit(): + seq1 = Sequences([[1, 2, 3], [1, 3, 1], [4, 5, 6]]) + assert str(Cluster(seq1, "Limit", 1, [2])) == str(seq1.cluster_limit(1, [2], True)) + seqn = Sequences([[[1, 2, 3], [1, 3, 1]], [[4, 5, 6], [7, 8, 9]]]) + assert str(Cluster(seqn, "Limit", 1, [2, 4, 6])) == str( + seqn.cluster_limit(1, [2, 4, 6], True) + ) + + +def test_transcode(build_seqn): + """This functionality need to be checked. + + See also the vector case!""" + seq = build_seqn + assert str( + seq.transcode( + 1, + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0o1, 1, 1, 1, 1, 0, 0], + False, + ) + ) == str( + Transcode( + seq, + 1, + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0o1, 1, 1, 1, 1, 0, 0], + ) + ) + + +def test_reverse(build_seqn): + """reverse to be checked. seems to give same output as input""" + s = build_seqn + s.reverse() + + +def test_max_length(build_data): + s = build_data + assert s.max_length == 30 + + +def test_get_max_value(build_data): + s = build_data + assert s.get_max_value(0) == 2 + + +def test_get_min_value(build_data): + s = build_data + assert s.get_min_value(0) == 0 + + +def test_get_length(build_data): + s = build_data + assert s.get_length(0) == 30 + assert s.get_length(1) == 22 + + +def test_difference(build_data): + data = build_data + assert str(Difference(data, 1)) == str(data.difference(1, False)) + res = Difference(data, 1) + assert res.cumul_length == 50 + + +def test_cumulate(build_data): + # see also test_cumulate for more tests + s = build_data + res = Cumulate(s) + assert res.cumul_length == 52 + + +def test_extract_vectors(build_data): + """see test_extract_vectors""" + ExtractVectors(build_data, "Length") + + +def _test_index_parameter_extract(build_data): + """fixme: markovian_sequences should be in wrapper ?""" + aml = build_data.index_parameter_extract(0, 29).markovian_sequences() + mod = IndexParameterExtract(build_data, 0, MaxIndex=29) + assert str(aml) == str(mod) + + +def test_index_parameter_select(): + """test to be done""" + pass + + +def test_recurrence_time_sequences(build_data): + aml = build_data.recurrence_time_sequences(1, 1) + mod = RecurrenceTimeSequences(build_data, 1, 1) + assert str(aml.markovian_sequences()) == str(mod) + + +def test_remove_run(): + """test to be done""" + pass + + +def test_transform_position(): + """test to be done""" + pass + + +def test_segmentation_extract(): + """test to be done""" + pass + + +def test_variable_scaling(): + """test to be done""" + pass + + +def test_remove_index_parameter(): + """test to be done""" + pass + + +def test_write_mtg(build_data): + import os + + build_data.mtg_write("test.mtg", [1, 2]) + os.remove("test.mtg") + + +def test_SaveMTG(build_data): + SaveMTG(build_data, Filename="test.mtg", Type=["N"]) + + +def test_split(): + # markovian sequences + data = Sequences(str(get_shared_data("vanille_m.seq"))) + Split(data, 2) + + +def test_initial_run(): + from openalea.sequence_analysis import ComputeInitialRun + + # markovian sequences + data = Sequences(str(get_shared_data("vanille_m.seq"))) + ComputeInitialRun(data) + """ @@ -486,6 +555,3 @@ def test_initial_run(self): seq.remove_index_parameter seq.round """ - -if __name__ == "__main__": - runTestClass(Test()) diff --git a/test/test_time_events.py b/test/test_time_events.py index 697c377..eee6567 100644 --- a/test/test_time_events.py +++ b/test/test_time_events.py @@ -15,8 +15,8 @@ from openalea.stat_tool.cluster import Cluster from openalea.stat_tool.cluster import Transcode, Cluster -from tools import interface -from tools import runTestClass, robust_path as get_shared_data +from .tools import interface +from .tools import runTestClass, robust_path as get_shared_data def TimeEventsData(): diff --git a/test/test_top_parameters.py b/test/test_top_parameters.py index 0b6a2d7..848f96d 100644 --- a/test/test_top_parameters.py +++ b/test/test_top_parameters.py @@ -17,8 +17,8 @@ -from tools import interface -from tools import runTestClass +from .tools import interface +from .tools import runTestClass def TopParametersData(): diff --git a/test/test_tops.py b/test/test_tops.py index 885ee96..5cd10b5 100644 --- a/test/test_tops.py +++ b/test/test_tops.py @@ -1,17 +1,18 @@ -""" Test tops data structure +"""Test tops data structure .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr .. todo:: to be done """ + __revision__ = "$Id$" from openalea.stat_tool import _stat_tool from openalea.sequence_analysis import _sequence_analysis from openalea.sequence_analysis import * -from tools import interface -from tools import runTestClass, robust_path as get_shared_data +from .tools import interface +from .tools import runTestClass, robust_path as get_shared_data def TopsData(): @@ -22,19 +23,15 @@ def TopsData(): class Test(interface): - """a simple unittest class - + """a simple unittest class""" - """ def __init__(self): - interface.__init__(self, - self.build_data(), - str(get_shared_data("test_tops1.dat")), - Tops) + interface.__init__( + self, self.build_data(), str(get_shared_data("test_tops1.dat")), Tops + ) def build_data(self): - - return Tops(str(get_shared_data('test_tops1.dat'))) + return Tops(str(get_shared_data("test_tops1.dat"))) def _test_empty(self): self.empty() @@ -52,15 +49,15 @@ def _test_constructor_arrayn(self): pass def _test_constructor_array1(self): - #print '--------------' - #top = Tops([1,2,3,4,5], Identifiers=[1]) - print('------------') - #print type(top) - print('---------') + # print '--------------' + # top = Tops([1,2,3,4,5], Identifiers=[1]) + print("------------") + # print type(top) + print("---------") print(top) - #print self.data - #top = Tops('data/tops1.dat') - #print top + # print self.data + # top = Tops('data/tops1.dat') + # print top def test_print(self): self.print_data() diff --git a/test/test_transcode.py b/test/test_transcode.py index 36bd49c..202cb27 100644 --- a/test/test_transcode.py +++ b/test/test_transcode.py @@ -1,9 +1,10 @@ -""" Test on transcode +"""Test on transcode .. author:: Thomas Cokelaer, Thomas.Cokelaer@inria.fr .. todo:: to be done """ + __revision__ = "$Id$" @@ -21,8 +22,3 @@ def test_transcode_semi_markov(): """test to be done""" pass -if __name__ == "__main__": - test_transcode_vectors() - test_transcode_sequences() - test_transcode_semi_markov() - \ No newline at end of file diff --git a/test/tools.py b/test/tools.py index 36f45a0..167cb86 100644 --- a/test/tools.py +++ b/test/tools.py @@ -6,7 +6,8 @@ from openalea.stat_tool import Simulate from openalea.stat_tool.plot import DISABLE_PLOT from openalea.stat_tool.output import Display, Save -DISABLE_PLOT=True + +DISABLE_PLOT = True from pathlib import Path from openalea.sequence_analysis import get_shared_data, get_shared_data_path @@ -23,12 +24,14 @@ __revision__ = "$Id$" + def runTestClass(myclass): - functions = [x for x in dir(myclass) if x.startswith('test')] + functions = [x for x in dir(myclass) if x.startswith("test")] for function in functions: getattr(myclass, function)() -class interface(): + +class interface: """Interface to be used by test file that perform tests on the following data structure: compound, convolution, mixture, histogram, vector @@ -39,7 +42,7 @@ class interface(): :Usage: In you test file, add :: - >>> from tools import interface + >>> from .tools import interface Then, if we consider the Compound class case, create a class as follows:: @@ -59,11 +62,12 @@ def test_empty(self): self.empty() """ - def __init__(self, data=None, filename=None, Structure=None): - self.data = data - self.filename = filename - self.structure = Structure - set_seed(0) + + # def __init__(self, data=None, filename=None, Structure=None): + # self.data = data + # self.filename = filename + # self.structure = Structure + # set_seed(0) def build_data(self): raise NotImplementedError() @@ -102,7 +106,7 @@ def display(self): data = self.data data.display() Display(data) - assert data.display()==Display(data) + assert data.display() == Display(data) def display_versus_ascii_write(self): """check that display is equivalent to ascii_write""" @@ -115,7 +119,7 @@ def display_versus_str(self): assert Display(data) == s def plot(self): - """run plotting routines """ + """run plotting routines""" if DISABLE_PLOT == False: self.data.plot() @@ -126,57 +130,56 @@ def save(self, Format=None, skip_reading=False): .. todo:: This is surely a bug. to be checked""" - c1 = self.data try: - os.remove('test1.dat') + os.remove("test1.dat") except: pass try: - os.remove('test2.dat') + os.remove("test2.dat") except: pass if Format is None: - c1.save('test1.dat') - Save(c1, 'test2.dat') + c1.save("test1.dat") + Save(c1, "test2.dat") else: - c1.save('test1.dat', Format="Data") - Save(c1, 'test2.dat', Format="Data") + c1.save("test1.dat", Format="Data") + Save(c1, "test2.dat", Format="Data") if skip_reading: pass else: - c1_read = self.structure('test1.dat') - c2_read = self.structure('test2.dat') + c1_read = self.structure("test1.dat") + c2_read = self.structure("test2.dat") print(c1_read) assert c1 and c1_read and c2_read assert str(c1_read) == str(c2_read) - #os.remove('test1.dat') - #os.remove('test2.dat') + # os.remove('test1.dat') + # os.remove('test2.dat') def plot_write(self): h = self.data - h.plot_write('test', 'title') + h.plot_write("test", "title") def file_ascii_write(self): h = self.data - h.file_ascii_write('test.dat', True) - os.remove('test.dat') + h.file_ascii_write("test.dat", True) + os.remove("test.dat") def file_ascii_data_write(self): h = self.data - h.file_ascii_data_write('test.dat', True) - os.remove('test.dat') + h.file_ascii_data_write("test.dat", True) + os.remove("test.dat") def spreadsheet_write(self): h = self.data - h.spreadsheet_write('test.dat') - os.remove('test.dat') + h.spreadsheet_write("test.dat") + os.remove("test.dat") def survival_ascii_write(self): d = self.data @@ -184,7 +187,7 @@ def survival_ascii_write(self): def survival_plot_write(self): d = self.data - d.survival_plot_write('test','test') + d.survival_plot_write("test", "test") def survival_file_ascii_write(self): d = self.data @@ -192,8 +195,8 @@ def survival_file_ascii_write(self): def survival_spreadsheet_write(self): d = self.data - d.survival_spreadsheet_write('test.xsl') - os.remove('test.xsl') + d.survival_spreadsheet_write("test.xsl") + os.remove("test.xsl") def simulate(self): """Test the simulate method""" @@ -217,14 +220,5 @@ def test_extract_data(self): def robust_path(filename): - p = get_shared_data_path(sa) - if p is not None: - # module in develop mode? - return get_shared_data(filename) - - p = Path(sa.__path__[0]) - if 'src' in str(p): - root_pkg = p/'../../..' - data = get_shared_data_path(root_pkg) - return os.path.join(data,filename) - + p = get_shared_data(filename) + return p