xboa
_voronoi_weighting.py
Go to the documentation of this file.
1 #This file is a part of xboa
2 #
3 #xboa is free software: you can redistribute it and/or modify
4 #it under the terms of the GNU General Public License as published by
5 #the Free Software Foundation, either version 3 of the License, or
6 #(at your option) any later version.
7 #
8 #xboa is distributed in the hope that it will be useful,
9 #but WITHOUT ANY WARRANTY; without even the implied warranty of
10 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 #GNU General Public License for more details.
12 #
13 #You should have received a copy of the GNU General Public License
14 #along with xboa in the doc folder. If not, see
15 #<http://www.gnu.org/licenses/>.
16 
17 """
18 VoronoiWeighting class should be imported directly from xboa.bunch.weighting
19 """
20 
21 import math
22 import bisect
23 
24 try:
25  import numpy
26 except ImportError:
27  pass
28 try:
29  import scipy.spatial
30 except ImportError:
31  pass
32 
33 try:
34  import ROOT
35 except ImportError:
36  pass
37 
38 import xboa.common as common
39 
40 class VoronoiWeighting(object):
41  """
42  VoronoiWeighting class enables the user to set statistical weights of Hits
43  in a Bunch based on how close they are to neighbouring Hits. The basic
44  principle is that if a Hit is in a sparsely populated region of phase space,
45  but the desired probability distribution would have the Hit in a more
46  densely populated region, we can apply a greater statistical weight to that
47  Hit (i.e. we "count" the Hit more than once in e.g. moment calculations
48  etc).
49 
50  In order to calculate how sparsely populated a particular region is, xboa
51  uses a "Voronoi" tesselation. xboa determines the nearest Hits surrounding a
52  given Hit, draws lines to those neighbours and then bisects the lines. This
53  generates a closed region around the Hit that is closer to that Hit than any
54  other.
55 
56  xboa calculates the "content" i.e. area/volume/hypervolume of the Voronoi
57  tile for each Hit. Hits which sit in a tile with a larger content are
58  assumed to be in a more sparsely populated region of phase space, while
59  those that sit in a region with a smaller content are assumed to be in a
60  less sparsely populated region of phase space.
61 
62  xboa assigns weights to each Hit according to a multivariate gaussian. In
63  order to prevent Hits on the edge of the distribution from acquiring a
64  disproportionate weighting, it is possible to define a bound around the
65  distribution. Hits outside the bound are disregarded. A few points are
66  defined on the bound, which prevent large Voronoi cells from being
67  generated.
68  """
69 
70  def __init__(self, weight_variables, weight_ellipse,
71  weight_mean = None, voronoi_bound = None):
72  """
73  Initialise the VoronoiWeighting
74  - weight_variables: list of string variable names for call to
75  Bunch.get_hit_variable(...). Should have a length corresponding to the
76  VoronoiWeighting dimension.
77  - weight_ellipse: numpy.array corresponding to the covariance matrix of
78  the desired multivariate gaussian. Should have numpy.shape like
79  (dimension, dimension) where dimension is the dimension of the
80  VoronoiWeighting.
81  - weight_mean: numpy.array corresponding to the mean of the desired
82  multivariate gaussian. If set to None, defaults to array of 0s. Should
83  have numpy.shape like (dimension) where dimension is the dimension of
84  the VoronoiWeighting.
85  - voronoi_bound: BoundingEllipse, corresponding to the maximum bound at
86  which Hits are accepted in the VoronoiWeighting. Hits outside this
87  bound will have weight set to 0. The ellipse bounding_points are used
88  as an external boundary to prevent Voronoi tesselations from getting a
89  big tile content (i.e. area/volume/hypervolume).
90 
91  Requires numpy and scipy to be installed.
92 
93  Returns a VoronoiWeighting object.
94  """
95  common.has_numpy()
96  common.has_scipy()
97  self.dim = len(weight_variables)
98  if weight_mean == None:
99  weight_mean = numpy.array([0.]*self.dim)
100  if (self.dim,) != numpy.shape(weight_mean):
101  raise ValueError("weight_mean shape "+\
102  str(numpy.shape(weight_mean))+" should be "+\
103  str((self.dim,))
104  )
105  if numpy.shape(weight_ellipse) != (self.dim, self.dim):
106  raise ValueError("weight_ellipse shape "+\
107  str(numpy.shape(weight_ellipse))+" should be "+\
108  str((self.dim, self.dim))
109  )
110  self.voronoi_bound = None
111  if voronoi_bound != None:
112  if voronoi_bound.dim != self.dim:
113  raise ValueError("voronoi_bound dimension should be same as "+\
114  "VoronoiWeighting dimension")
115 
116  self.voronoi_bound = voronoi_bound
117  self.weight_variables = weight_variables
118  self.weight_ellipse_inv = numpy.linalg.inv(weight_ellipse)
119  self.weight_ellipse_det = numpy.linalg.det(weight_ellipse)
120  self.weight_mean = weight_mean
121  self.weight_list = None
122  self.tesselation = None
123  self.tile_content_list = None
124  self.real_points = None
126  def apply_weights(self, bunch, global_cut):
127  """
128  Apply a weighting \f$w_i\f$ to each Hit in bunch corresponding to\n
129  \f$ w_i = c_i f(x_i) \f$\n
130  where \f$c_i\f$ is the content of each Voronoi tile and \f$f(x_i)\f$ is
131  the value of a multivariate gaussian evaluated at \f$x_i\f$.
132  - bunch: xboa.bunch.Bunch object. Evaluate weights for all Hits in bunch
133  - global_cut: if set to True, weighting will be applied to the hit's
134  global_weight. If set to False, weighting will be applied to the hit's
135  local_weight.
136  Returns None (the bunch is weighted "in-place")
137  """
138  self.real_points = bunch.list_get_hit_variable(self.weight_variables)
139  self.real_points = zip(*tuple(self.real_points))
140  self.real_points = numpy.array(self.real_points)
141  if self.voronoi_bound != None:
142  self.real_points, not_cut = \
143  self.voronoi_bound.cut_on_bound(self.real_points)
144  points = numpy.vstack((self.real_points,
145  self.voronoi_bound.bounding_points))
146  else:
147  points = self.real_points
148  not_cut = range(len(self.real_points))
149  self.tesselation = scipy.spatial.Voronoi(points)
150  self.tile_content_list = [None]*len(self.real_points)
151  for i, point in enumerate(self.real_points):
152  tile_index = self.tesselation.point_region[i]
153  region = self.tesselation.regions[tile_index]
154  if -1 in region or len(region) == 0: # points at infinity
155  self.tile_content_list[i] = 0.
156  else:
157  self.tile_content_list[i] = self._tile_content(i)
158  if global_cut:
159  weight = 'global_weight'
160  else:
161  weight = 'local_weight'
162  self.weight_list = [None]*len(bunch)
163  for i, hit_index in enumerate(not_cut):
164  hit = bunch[hit_index]
165  pdf_target = self.get_pdf(self.real_points[i])
166  self.weight_list[i] = pdf_target*self.tile_content_list[i]
167  hit[weight] = self.weight_list[i]
168  for i, hit in enumerate(bunch):
169  if i not in not_cut:
170  hit[weight] = 0.
171 
172  def get_pdf(self, point):
173  """
174  Calculate a multivariate gaussian at point.
175  - bunch: xboa.bunch.Bunch object. Evaluate weights for all Hits in bunch
176  - global_cut: if set to True, weighting will be applied to the hit's
177  global_weight. If set to False, weighting will be applied to the hit's
178  local_weight.
179  """
180  norm = (2.*math.pi)**numpy.shape(point)[0]*self.weight_ellipse_det
181  delta = point - self.weight_mean
182  delta_t = numpy.transpose(delta)
183  arg = -0.5*delta.dot(self.weight_ellipse_inv.dot(delta_t))
184  gauss = math.exp(arg)/norm**0.5
185  return gauss
186 
187  def plot_two_d_projection(self, projection_axes, fill_option = None,
188  lower = None, upper = None):
189  """
190  Plot a 2D projection of the voronoi cells.
191  - projection_axes: list of length two. Determines the projection that
192  will be drawn. Should be a subset of the weight_variables.
193  - fill_option: if set to None or 'none', tiles will not be filled. If
194  set to 'weight', 'content', 'pdf', tiles will be filled according to
195  the calculated point weight, calculated tile content or probability
196  density function for that point.
197  - lower: integer lower bound for slicing of the tesselation regions. If
198  set to None, defaults to 0.
199  - upper: integer upper bound for slicing of the tesselation regions. If
200  set to None, defaults to the end of the tesselation regions list.
201 
202  Requires ROOT to be installed.
203 
204  Returns a tuple of (canvas, histogram, point_graph, tile_graph_list)
205  where canvas is the ROOT TCanvas, histogram is the ROOT TH2D that makes
206  the graph axes, point_graph is the graph with the points plotted,
207  tile_graph_list is a list of graphs, one for each tile.
208  """
209  common.has_root()
210  if lower == None:
211  lower = 0
212  if upper == None:
213  upper = len(self.real_points)
214  if len(projection_axes) != 2:
215  raise ValueError("Expected projection axes of length 2")
216  if projection_axes[0] not in self.weight_variables or \
217  projection_axes[1] not in self.weight_variables:
218  raise ValueError("Projection axes "+str(projection_axes)+\
219  " should be a subset of weight_variables "+
220  str(weight_variables))
221  if self.tesselation == None:
222  raise ValueError("Need to apply_weights before tesselation can be"+
223  "plotted")
224  title = "tesselation_"+str(projection_axes[0])+"_"+\
225  str(projection_axes[1])
226  canvas = common.make_root_canvas(title)
227  proj = [self.weight_variables.index(x) for x in projection_axes]
228  x_list = [vertex[proj[0]] for vertex in self.tesselation.vertices]
229  y_list = [vertex[proj[1]] for vertex in self.tesselation.vertices]
230  points = numpy.transpose(self.real_points)
231  hist, points_graph = common.make_root_graph(title,
232  points[0][lower:upper],
233  projection_axes[0],
234  points[1][lower:upper],
235  projection_axes[1])
236  if fill_option != None and fill_option != 'none':
237  hist.SetTitle('Color by '+fill_option)
238  hist.Draw("colz")
239  points_graph.SetMarkerStyle(6)
240  tile_graphs = []
241  fill_colors = self._get_fill_colors(fill_option, lower, upper)
242  for point_index, tile_index in \
243  enumerate(self.tesselation.point_region[lower:upper]):
244  tile = self.tesselation.regions[tile_index]
245  if -1 in tile or len(tile) == 0: # points at infinity
246  continue
247  graph = ROOT.TGraph(len(tile)+1)
248  tile_graphs.append(graph)
249  for i, vertex_index in enumerate(tile):
250  graph.SetPoint(i,
251  x_list[vertex_index],
252  y_list[vertex_index])
253  graph.SetPoint(len(tile),
254  x_list[tile[0]],
255  y_list[tile[0]])
256  graph.SetFillColor(fill_colors[point_index])
257  graph.Draw('f')
258  graph.Draw('l')
259  points_graph.Draw('p')
260  canvas.Update()
261  common._common._hist_persistent += tile_graphs
262  return canvas, hist, points_graph, tile_graphs
263 
264  def _get_fill_colors(self, fill_option, lower, upper):
265  """Get fill colors for plot_two_d_projection"""
266  if fill_option == "weight":
267  fill_data = self.weight_list[lower:upper]
268  elif fill_option == "content":
269  fill_data = self.tile_content_list[lower:upper]
270  elif fill_option == "pdf":
271  fill_data = [self.get_pdf(point) for point in self.real_points]\
272  [lower:upper]
273  elif fill_option == "none" or fill_option == None:
274  return [0]*len(self.real_points) # always white
275  else:
276  raise ValueError("Did not recognise fill option "+str(fill_option)+\
277  "should be one of ['weight', 'content', 'pdf', 'none']")
278  n_colors = ROOT.TColor.GetNumberOfColors()
279  bounds = [min(fill_data)+i*(max(fill_data)-min(fill_data))/(n_colors-1)\
280  for i in range(n_colors)]
281  fill_colors = [bisect.bisect_left(bounds, fill) for fill in fill_data]
282  fill_colors = [ROOT.TColor.GetColorPalette(i) for i in fill_colors]
283  return fill_colors
284 
285  def _get_cm_matrix(self, simplex, triangulation, dimension):
286  elements = numpy.zeros([dimension+2, dimension+2])
287  for i in range(1, dimension+2):
288  elements[i, 0] = 1.
289  elements[0, i] = 1.
290  for i, vertex_i in enumerate(simplex):
291  for j, vertex_j in enumerate(simplex[i+1:]):
292  j += i+1
293  point_i = triangulation.points[vertex_i]
294  point_j = triangulation.points[vertex_j]
295  square = [(point_i[d] - point_j[d])**2 \
296  for d in xrange(dimension)]
297  length_sq = sum(square)
298  elements[i+1, j+1] = length_sq
299  elements[j+1, i+1] = length_sq
300  return elements
301 
302  def _tile_content(self, point_index):
303  """
304  Calculate the tile for the index at point_index
305 
306  Use Cayler-Menger determinant to calculate content of an arbitrary
307  dimensional simplex.
308  """
309  tile_index = self.tesselation.point_region[point_index]
310  region = self.tesselation.regions[tile_index]
311  points = numpy.array([self.tesselation.vertices[i] for i in region])
312  dimension = numpy.shape(points)[1]
313  content = 0.
314  triangulation = scipy.spatial.Delaunay(points)
315  coefficient = (-1)**(dimension+1)/2.**dimension
316  coefficient /= numpy.math.factorial(dimension)**2.
317  for simplex in triangulation.simplices:
318  elements = self._get_cm_matrix(simplex, triangulation, dimension)
319  det = numpy.linalg.det(elements)
320  if det != det: # underflow
321  det = 0.
322  this_content = (coefficient*det)**0.5
323  if this_content == this_content:
324  content += this_content
325  return content
326 
327 
def apply_weights
Apply a weighting to each Hit in bunch corresponding to where is the content of each Voronoi til...
def _tile_content
Calculate the tile for the index at point_index.
VoronoiWeighting class enables the user to set statistical weights of Hits in a Bunch based on how cl...
common module defines common utility data and functions that are used elsewhere
Definition: __init__.py:1
def _get_fill_colors
Get fill colors for plot_two_d_projection.
def get_pdf
Calculate a multivariate gaussian at point.
def plot_two_d_projection
Plot a 2D projection of the voronoi cells.