Source code for graphmuse.samplers

from .csamplers import *
import numpy
import torch
from .base_samplers import SubgraphMultiplicitySampler
from .sampler_functions import random_score_region_torch


[docs] def graph(edges): """ creates a Graph object from a numpy array of edges. It includes type checking. Parameters ---------- edges : numpy.ndarray a 2D array of integers. The first row contains the source nodes, the second row the destination nodes, the third row the edge types. Returns ------- Graph a Graph object in C """ # TODO: support 64 bit integers if edges.dtype not in (numpy.int32,): raise TypeError(f"currently only {numpy.int32} nodes supported, not {edges.dtype}") node_count = max(numpy.max(edges[0]), edges[1][-1])+1 return Graph(edges, node_count)
[docs] def sample_nodewise(cgraph, depth, samples_per_node, targets): """ Python wrapper function for C Extension function c_sample_nodewise Samples nodes within a score graph Parameters ---------- cgraph : Graph The score graph implemented in c. It is an attribute of the HeteroScoreGraph. depth : int The number of layers that are sampled samples_per_node : int The number of samples per node. targets : np.ndarray initial value for the sampling iteration """ samples_per_layer, edges_between_layers, load_per_layer, total_samples = c_sample_nodewise(cgraph, depth, samples_per_node, targets) # move to torch tensors samples_per_layer = [torch.from_numpy(layer) for layer in samples_per_layer] edges_between_layers = [torch.from_numpy(edges) for edges in edges_between_layers] load_per_layer = [torch.from_numpy(layer) for layer in load_per_layer] total_samples = torch.from_numpy(total_samples) return samples_per_layer, edges_between_layers, load_per_layer, total_samples
[docs] def random_score_region(note_onsets, budget): """ Python wrapper function for C Extension function c_random_score_region It samples a score region of a given budget from a score graph. Parameters ---------- note_onsets : array/tensor int This represents a score graph, as in, the data in this structure determines the edges between nodes required fields: onset_div, duration_div note_array['onset_div'] is a non-decreasing integer array note_array['duration_div'] is an integer array budget : int The maximum number of nodes in the region Returns ------- region : tuple The region to sample from. It is a tuple of two integers, start and end. """ if isinstance(note_onsets, torch.Tensor): note_onsets = note_onsets.numpy() onsets = note_onsets.astype(numpy.int32) _,unique_onset_indices = numpy.unique(onsets, return_index=True) unique_onset_indices = unique_onset_indices.astype(numpy.int32) if len(onsets) - unique_onset_indices[-1] > budget and (numpy.diff(unique_onset_indices)>budget).all(): raise ValueError("impossible to sample a score region with the given budget within given note array") return c_random_score_region(onsets, unique_onset_indices, budget)
[docs] def extend_score_region_via_neighbor_sampling(cgraph, note_array, region, samples_per_node, sample_rightmost=True): """Wrap the C extension ``c_extend_score_region_via_neighbor_sampling``. The routine samples neighbours and pre-neighbours that lie directly outside the provided score region. Parameters ---------- cgraph : Graph Score graph implemented in C (attribute of :class:`HeteroScoreGraph`). note_array : partitura or numpy structured array Score representation. Requires ``onset_div`` and ``duration_div`` integer fields. region : tuple[int, int] Inclusive start and exclusive end describing the region boundaries. samples_per_node : int Number of samples drawn per node. sample_rightmost : bool, optional Whether to compute the right extension, by default ``True``. Returns ------- tuple ``(left_extension, right_extension)`` where each element is a tuple ``(nodes, edges)`` of sampled indices and associated edge pairs. Notes ----- The underlying C routine expects ``onset_div`` and ``duration_div`` as ``int32`` arrays and the cumulative maximum of ``onset_div + duration_div``. """ region_start, region_end = region onsets = note_array["onset_div"].astype(numpy.int32) durations = note_array["duration_div"].astype(numpy.int32) if not isinstance(sample_rightmost, bool): raise ValueError(f"non-bool object {sample_rightmost} passed in as sample_rightmost parameter") if region_start>=region_end: raise ValueError("invalid region given") if region_start==0 and region_end==len(onsets)-1: raise ValueError("can't extend score region if the region covers the entire score") endtimes_cummax = numpy.maximum.accumulate(onsets+durations) (left_nodes, left_edges), (right_nodes, right_edges) = c_extend_score_region_via_neighbor_sampling(cgraph, onsets, durations, endtimes_cummax, region_start, region_end, samples_per_node, sample_rightmost) # move to torch tensors left_nodes = torch.from_numpy(left_nodes) left_edges = torch.from_numpy(left_edges).long() right_nodes = torch.from_numpy(right_nodes) right_edges = torch.from_numpy(right_edges).long() return (left_nodes, left_edges), (right_nodes, right_edges)
[docs] def sample_neighbors_in_score_graph(note_array, depth, samples_per_node, targets): """ Python wrapper function for C Extension function c_sample_neighbors_in_score_graph Samples Neighbors within a score graph In comparison to other methods involving pre-neighbors, this one doesn't use a lookup table for the neighborhood of a node, but it computes the neighborhood of a node on the fly which can be done efficiently due to the form that neighborhoods have in score graphs Parameters ---------- note_array : partitura/numpy.structured array This represents a score graph, as in, the data in this structure determines the edges between nodes required fields: onset_div, duration_div note_array['onset_div'] is a non-decreasing integer array note_array['duration_div'] is an integer array depth : int The number of layers that are sampled samples_per_node : int The number of samples per node. targets : np.ndarray initial value for the sampling iteration Note: c_sample_neighbors_in_score_graph expects onsets and durations to be passed in as int32 integer arrays (see code below) Returns ------- samples_per_layer: PyList(type=np.ndarray, length=depth+1) List of numpy arrays of nodes (called layers) where the last layer corresponds to 'targets' and each n-th layer which isn't the last is a subset of the pre-neighborhood of the n+1-th layer edges_between_layers: PyList(type=np.ndarray(2, N), length=depth) List of numpy arrays of edges which show how 2 consecutive layers in samples_per_layer are connected total_samples : numpy.ndarray the union of samples_per_layer """ if len(targets)==0: return [],[],torch.empty(0,dtype=torch.long) onsets = note_array["onset_div"].astype(numpy.int32) durations = note_array["duration_div"].astype(numpy.int32) samples_per_layer, edges_between_layers, total_samples = c_sample_neighbors_in_score_graph(onsets, durations, depth, samples_per_node, targets) # move to torch tensors samples_per_layer = [torch.from_numpy(layer) for layer in samples_per_layer] edges_between_layers = [torch.from_numpy(edges).long() for edges in edges_between_layers] total_samples = torch.from_numpy(total_samples) return samples_per_layer, edges_between_layers, total_samples
[docs] def sample_preneighbors_within_region(cgraph, region, samples_per_node=10): """ Python wrapper function for C Extension function c_sample_preneighbors_within_region Samples the pre-neighbors (or predecessors) within a score region. Parameters ---------- cgraph : Graph The score graph implemented in c. It is an attribute of the HeteroScoreGraph. region : tuple The region to sample from. It is a tuple of two integers, start and end. The region is inclusive on the left and exclusive on the right. samples_per_node : int The number of samples per node. Note: c_sample_preneighbors_within_region expects the region to be passed as 2 separate integers (see return statement) Returns ------- Samples: np.ndarray The sampled nodes. It is a 1D array of integers. It might not contain all nodes in the region. edges: np.ndarray (2, num_edges) The edges. It is a 2D array of integers. The first row contains the source nodes, the second row the destination nodes. """ region_start, region_end = region if region_start>=region_end: raise ValueError("invalid region given") samples, edges = c_sample_preneighbors_within_region(cgraph, region_start, region_end, samples_per_node) # move to torch tensors samples = torch.from_numpy(samples) edges = torch.from_numpy(edges).long() return samples, edges