import libtbx.phil

pandda_phil = libtbx.phil.parse("""
pandda
{
    input
        .help = "File names"
    {
        data_dirs = None
            .type = str
        pdb_style = final.pdb
            .type = str
        mtz_style = None
            .type = str
        lig_style = *.cif
            .type = str
        max_new_datasets = 500
            .help = "Maximum number of new datasets that can be processed for an analysis -- used to break the analysis into chunks when processing large numbers of datasets"
            .type = int
        regex
            .help = "Advanced dataset labelling regexs"
        {
            dir_regex = None
                .type = str
            pdb_regex = None
                .type = str
            mtz_regex = None
                .type = str
        }
        filter
            .help = "Provide a dataset to filter input datasets against"
        {
            pdb = None
                .type = path
        }
        reference
            .help = "Manually define reference dataset to align+scale all other datasets to"
        {
            pdb = None
                .type = path
            mtz = None
                .type = path
            structure_factors = None
                .type = str
        }
        flags
            .help = "Flags for individual datasets"
        {
            exclude_from_zmap_analysis = None
                .help = 'Don\'t analyse these datasets, only use them to build the distributions - comma separated list of dataset tags'
                .type = str
            exclude_from_characterisation = None
                .help = 'Don\'t use these datasets to build density distributions, only analyse them - comma separated list of dataset tags'
                .type = str
            ignore_datasets = None
                .help = 'Reject these datasets, don\'t even load them - comma separated list of dataset tags'
                .type = str
            reprocess_datasets = None
                .help = "Selection of existing datasets to reproces (treat as new datasets) - comma separated list of dataset tags. Setting this will set flags.existing_datasets=reload."
                .type = str
        }
    }
    output
        .help = "Output directory"
    {
        out_dir = './pandda'
            .type = path
        new_analysis_dir = False
            .help = "Create a new analysis directory to prevent overwriting previous results"
            .type = bool
        dataset_prefix = ''
            .help = "Prefix to be attached to each dataset name"
            .type = str
        maps
            .help = "Control which maps are output by the program"
        {
            write_z_maps = none *interesting all
                .help = "Output the z-maps in the native frame of datasets for selected datasets"
                .type = choice
            write_mean_map = none *interesting all
                .help = "Output the mean map in the native frame of datasets for selected datasets"
                .type = choice
            write_dataset_map = *none interesting all
                .help = "Output the analysed maps in the native frame of datasets for selected datasets"
                .type = choice
            write_statistical_maps = *none reference
                .help = "Output statistical maps in the native frame of datasets"
                .type = choice
        }
        pickling
            .help = "Pickle Settings"
            .expert_level = 1
        {
            pickle_complete_pandda = False
                .help = "Pickle the entire PANDDA object for reloading - takes a lot of space"
                .type = bool
            pickle_map_analysers = False
                .help = "Pickle map analysers"
                .type = bool
            pickle_dataset_maps = False
                .help = "Pickle the dataset maps as part of the dataset pickle object"
                .type = bool
        }
        developer
            .help = "Development Settings (Not needed by most users)"
            .expert_level = 3
        {
            write_all = False
                .help = "Activate all developer flags"
                .type = bool
            write_reference_frame_maps = False
                .help = "Output maps for datasets in the reference coordinate frame"
                .type = bool
            write_reference_frame_grid_masks = False
                .help = "Output the grid masks which control which areas are analysed"
                .type = bool
            write_reference_frame_statistical_maps = False
                .help = "Output the statistical maps in the reference coordinate frame"
                .type = bool
            write_reference_frame_all_z_map_types = False
                .help = "Output all possible types of Z-maps"
                .type = bool
        }
    }
    flags
        .help = "control which datasets are loaded and processed, and when statistical maps are calculated"
    {
        stages = *add_datasets *density_characterisation *zmap_analysis
            .help = "Which parts of the program should be turned on? add_datasets: find and add new datasets (not needed to reload old datasets). density_characterisation: perform statistical density analysis. zmap_analysis: identify local events in each dataset."
            .type = choice(multi=True)
        existing_datasets = reprocess *reload ignore
            .help = "What to do with previously-analysed datasets? reprocess: old datasets are treated as new and processed fully. reload: events identified in old datasets will be included in results. ignore: ..."
            .type = choice
        recalculate_statistical_maps = yes no *extend
            .help = "Set whether statistical maps are re-used from previous runs. If No, it looks for existing statistical maps and uses those (reverts to Yes if none are found). If Extend is chosen, existing maps are used, but additional maps are calculated at high and low resolutions if the data extends beyond the current range."
            .type = choice
        density_analysis_for = all_resolutions *datasets
            .help = "Select which resolutions density is analysed at - characterise the density always? or only when there is a dataset to be analysed?"
            .type = choice
    }
    shortcuts
        .help = "Shortcuts to set sets of parameters to defaults"
    {
        run_in_single_dataset_mode = False
            .help = "Set the default parameters to allow the characterisation to be performed using a single dataset (no variation analysis)"
            .type = bool
    }
    params
        .help = "Algorithm Parameters"
    {
        checks
            .help = "Checks on the mtz file data provided for each dataset"
        {
            all_data_are_valid_values = True
                .help = "Check that all reflections in the diffraction data have valid values (are not zero or N/A)"
                .type = bool
            low_resolution_completeness = 4.0
                .help = "Check that diffraction data is 100% complete up to this resolution cutoff. Missing reflections at low resolution may seriously degrade analysis quality. Set to None to turn off this check."
                .type = float
        }
        alignment
            .help = "Settings to control the alignment of the structures"
        {
            method = global *local
                .help = "How should the structures be aligned? 'global' is fast, but requires high structural conservation, whereas local is slower and accounts for structural variation"
                .type = choice
        }
        maps
            .help = "Settings to control how maps are generated and analysed"
        {
            structure_factors = None
                .type = str
                .multiple = True
            use_b_factor_scaling = True
                .help = "Use B-factor-scaled diffraction data"
                .type = bool
            scaling = none *sigma volume
                .type = choice
            resolution_factor = 0.25
                .help = 'Sampling factor for fft-ing the maps'
                .type = float
            grid_spacing = 0.5
                .help = 'Spacing of the grid points in the sampled maps (A) - fixed across resolutions'
                .type = float
            padding = 3
                .help = "Padding around the edge of the maps (A)"
                .type = float
        }
        masks
            .help = "Parameters to control the masking of grid points around the protein"
        {
            pdb = None
                .help = "A PDB to mask the grid against (if none provided, use reference dataset)"
                .type = str
                .multiple = False
            align_mask_to_reference = True
                .help = "If masks.pdb is supplied, does it require alignment to the reference structure? If selecting a fragment of the structure, masks.pdb must already be aligned prior to running pandda (can't align fragments)."
                .type = bool
            inner_mask = 1.8
                .help = "Points are masked within this distance of protein atoms"
                .type = float
            inner_mask_symmetry = 3.0
                .help = "Points are masked within this distance of neighbouring symmetry copies of protein atoms"
                .type = float
            outer_mask = 6
                .help = "Points are masked outside this distance of protein atoms"
                .type = float
        }
        filtering
            .help = "Settings to control when datasets are rejected from the analysis"
        {
            max_rmsd_to_reference = 1.5
                .help = "Reject datasets that have a calpha rmsd of greater than this to the reference (after global alignment)"
                .type = float
                .multiple = False
            max_rfree = 0.4
                .help = 'Maximum allowed rfree for a structure (datasets above this are rejected)'
                .type = float
                .multiple = False
            flags {
                same_space_group_only = True
                    .help = "Reject datasets that are a different spacegroup to the reference/filter dataset - NOT YET IMPLEMENTED - MUST BE SET TO TRUE"
                    .type = bool
                similar_models_only = False
                    .help = "Reject datasets that have a different model composition to the reference/filter dataset. All models must have the same number and identity of atoms."
                    .type = bool
            }
        }
        excluding
            .help = "Parameters to control when datasets are automatically excluded from characterisation"
        {
            max_wilson_plot_z_score = 5.0
                .help = "Maximum Z-score for RMSD of dataset diffraction wilson curve compared to the reference dataset. Z-scores calculated relative to the rest of the datasets."
                .type = float
                .multiple = False
        }
        analysis
            .help = "Settings to control the selection of datasets"
        {
            min_build_datasets = 40
                .help = 'Minimum number of datasets needed to build distributions'
                .type = int
            max_build_datasets = 60
                .help = 'Maximum number of datasets used to build distributions'
                .type = int
            dynamic_res_limits = True
                .help = 'Allow the analysed resolution limits to change depending on the dataset resolution ranges'
                .type = bool
            high_res_upper_limit = 0.0
                .help = 'Highest resolution limit (maps are never calulcated above this limit)'
                .type = float
            high_res_lower_limit = 4.0
                .help = 'Lowest resolution limit (datasets below this are ignored)'
                .type = float
            high_res_increment = 0.05
                .help = 'Increment of resolution shell for map analysis'
                .type = float
        }
        z_map
            .help = "Settings to control the calculation of z-maps"
        {
            map_type = naive uncertainty *adjusted+uncertainty
                .help = 'Type of Z-map to calculate'
                .type = choice
        }
        blob_search
            .help = "Settings to control the finding of blobs"
            .expert_level = 1
        {
            contour_level = 2.5
                .help = 'Contour level when looking for blobs'
                .type = float
            negative_values = False
                .help = 'Look for large negative values as well?'
                .type = bool
            min_blob_volume = 10.0
                .help = 'Blob volume filter for detecting blobs'
                .type = float
            min_blob_z_peak = 3.0
                .help = 'Blob Z-peak filter for detecting blobs'
                .type = float
        }
        background_correction
            .help = "Parameters to control the estimation of feature background corrections"
            .expert_level = 3
        {
            min_bdc = 0.0
                .type = float
                .help = 'Minimum background correction estimate'
            max_bdc = 1.0
                .type = float
                .help = 'Maximum background correction estimate'
            increment = 0.01
                .type = float
                .help = 'Resolution of background correction estimation'
            output_multiplier = 1.0
                .type = float
                .help = 'Empirical multiplier to be applied to the contrast-estimated value of 1-BDC'
        }
    }
    results
        .help = "Change ordering/filtering of the output data"
    {
        events {
            order_by = z_peak z_mean *cluster_size
                .help = "How should events be ordered within each site?"
                .type = choice
        }
        sites {
            order_by = *num_events
                .help = "How should sites be ordered?"
                .type = choice
        }
    }
    exit_flags
        .help = "Flags for terminating the program early"
    {
        dry_run = False
            .help = "Find input files on disk, but exit before loading any datasets."
            .type = bool
        setup_only = False
            .help = "Load datasets and create grid partition, but exit before analysis."
            .type = bool
        calculate_first_mean_map_only = False
           .help = "Will calculate the highest resolution mean map and then exit - used for initial reference modelling."
            .type = bool
    }
}
include scope giant.phil.settings_phil
""", process_includes=True)