@article{cardoso-2022-rtpercept,
  title =      "Training and Predicting Visual Error for Real-Time
               Applications",
  author =     "Joao Afonso Cardoso and Bernhard Kerbl and Lei Yang and Yury
               Uralsky and Michael Wimmer",
  year =       "2022",
  abstract =   "Visual error metrics play a fundamental role in the
               quantification of perceived image similarity. Most recently,
               use cases for them in real-time applications have emerged,
               such as content-adaptive shading and shading reuse to
               increase performance and improve efficiency. A wide range of
               different metrics has been established, with the most
               sophisticated being capable of capturing the perceptual
               characteristics of the human visual system. However, their
               complexity, computational expense, and reliance on reference
               images to compare against prevent their generalized use in
               real-time, restricting such applications to using only the
               simplest available metrics.  In this work, we explore the
               abilities of convolutional neural networks to predict a
               variety of visual metrics without requiring either reference
               or rendered images. Specifically, we train and deploy a
               neural network to estimate the visual error resulting from
               reusing shading or using reduced shading rates. The
               resulting models account for 70%--90% of the variance while
               achieving up to an order of magnitude faster computation
               times. Our solution combines image-space information that is
               readily available in most state-of-the-art deferred shading
               pipelines with reprojection from previous frames to enable
               an adequate estimate of visual errors, even in previously
               unseen regions. We describe a suitable convolutional network
               architecture and considerations for data preparation for
               training. We demonstrate the capability of our network to
               predict complex error metrics at interactive rates in a
               real-time application that implements content-adaptive
               shading in a deferred pipeline. Depending on the portion of
               unseen image regions, our approach can achieve up to 2x
               performance compared to state-of-the-art methods.",
  month =      may,
  journal =    "Proceedings of the ACM on Computer Graphics and Interactive
               Techniques",
  volume =     "5",
  number =     "1",
  issn =       "2577-6193",
  doi =        "10.1145/3522625",
  pages =      "17",
  publisher =  "Association for Computing Machinery",
  pages =      "1--17",
  keywords =   "perceptual error, variable rate shading, real-time",
  URL =        "https://www.cg.tuwien.ac.at/research/publications/2022/cardoso-2022-rtpercept/",
}

@inproceedings{celarek-2022-gmcn,
  title =      "Gaussian Mixture Convolution Networks",
  author =     "Adam Celarek and Pedro Hermosilla-Casajus and Bernhard Kerbl
               and Timo Ropinski and Michael Wimmer",
  year =       "2022",
  abstract =   "This paper proposes a novel method for deep learning based
               on the analytical convolution of multidimensional Gaussian
               mixtures. In contrast to tensors, these do not suffer from
               the curse of dimensionality and allow for a compact
               representation, as data is only stored where details exist.
               Convolution kernels and data are Gaussian mixtures with
               unconstrained weights, positions, and covariance matrices.
               Similar to discrete convolutional networks, each convolution
               step produces several feature channels, represented by
               independent Gaussian mixtures. Since traditional transfer
               functions like ReLUs do not produce Gaussian mixtures, we
               propose using a fitting of these functions instead. This
               fitting step also acts as a pooling layer if the number of
               Gaussian components is reduced appropriately. We demonstrate
               that networks based on this architecture reach competitive
               accuracy on Gaussian mixtures fitted to the MNIST and
               ModelNet data sets.",
  month =      apr,
  publisher =  "OpenReview.org",
  event =      "ICLR | 2022",
  booktitle =  "The Tenth International Conference on Learning
               Representations (ICLR 2022)",
  pages =      "1--23",
  URL =        "https://www.cg.tuwien.ac.at/research/publications/2022/celarek-2022-gmcn/",
}

@mastersthesis{FRAISS-2022-CGMM,
  title =      "Construction and Visualization of Gaussian Mixture Models
               from Point Clouds for 3D Object Representation",
  author =     "Simon Maximilian  Fraiss",
  year =       "2022",
  abstract =   "Point clouds are a common representation of
               three-dimensional shapes in computer graphics and 3D-data
               processing. However, in some applications, other
               representations are more useful. Gaussian Mixture Models
               (GMMs) can be used as such an alternative representation. A
               GMM is a convex sum of normal distributions, which aims to
               describe a point cloud’s density. In this thesis, we
               investigate both visualization and construction of GMMs. For
               visualization, we have implemented a tool that enables both
               isoellipsoid and density visualization of GMMs. We describe
               the mathematical backgrounds, the algorithms, and our
               implementation of this tool. Regarding GMM construction, we
               investigate several algorithms used in previous papers for
               constructing GMMs for 3D-data processing tasks. We present
               our implementations of the expectation-maximization (EM)
               algorithm and top-down HEM. Additionally, we have adapted
               the implementation of geometrically regularized bottom-up
               HEM to produce a fixed number of Gaussians. We evaluate
               these three algorithms in terms of the quality of their
               generated GMMs. In many cases, the statistical likelihood,
               which is maximized by the EM algorithm, is not a reliable
               indicator for a GMM’s quality. Therefore, we instead rely
               on the reconstruction error of a reconstructed point cloud
               based on the Chamfer distance. Additionally, we provide
               metrics for measuring the reconstructed point cloud’s
               uniformity and the GMM’s variation of Gaussians. We
               demonstrate that EM provides the best results in terms of
               these metrics. Top-down HEM is a fast alternative, and can
               produce even better results when using fewer input points.
               The results of geometrically regularized bottom-up HEM are
               inferior for lower numbers of Gaussians but it can create
               good GMMs consisting of high numbers of Gaussians very
               eciently.",
  month =      mar,
  address =    "Favoritenstrasse 9-11/E193-02, A-1040 Vienna, Austria",
  school =     "Research Unit of Computer Graphics, Institute of Visual
               Computing and Human-Centered Technology, Faculty of
               Informatics, TU Wien",
  URL =        "https://www.cg.tuwien.ac.at/research/publications/2022/FRAISS-2022-CGMM/",
}

@inproceedings{cardoso-2021-cost,
  title =      "Cost Volume Refinement for Depth Prediction",
  author =     "Joao Afonso Cardoso and Nuno Goncalves and Michael Wimmer",
  year =       "2021",
  abstract =   "Light-field cameras are becoming more popular in the
               consumer market. Their data redundancy allows, in theory, to
               accurately refocus images after acquisition and to predict
               the depth of each point visible from the camera. Combined,
               these two features allow for the generation of full-focus
               images, which is impossible in traditional cameras. Multiple
               methods for depth prediction from light fields (or stereo)
               have been proposed over the years. A large subset of these
               methods relies on cost-volume estimates – 3D objects where
               each layer represents a heuristic of whether each point in
               the image is at a certain distance from the camera.
               Generally, this volume is used to regress a depth map, which
               is then refined for better results. In this paper, we argue
               that refining the cost volumes is superior to refining the
               depth maps in order to further increase the accuracy of
               depth predictions. We propose a set of cost-volume
               refinement algorithms and show their effectiveness.",
  month =      jan,
  isbn =       "978-1-7281-8809-6",
  publisher =  "IEEE",
  location =   "Milan, Italy",
  event =      "25th International Conference on Pattern Recognition (ICPR)",
  doi =        "10.1109/ICPR48806.2021.9412730",
  booktitle =  "Proceedings of the 25th International Conference on Pattern
               Recognition",
  pages =      "354--361",
  keywords =   "depth reconstruction, light fields, cost volumes",
  URL =        "https://www.cg.tuwien.ac.at/research/publications/2021/cardoso-2021-cost/",
}

@bachelorsthesis{hanko-2019-ani,
  title =      "Higher Hand-Drawn Detail Quality using Convolutional
               Assistant",
  author =     "Dominik Hanko",
  year =       "2020",
  abstract =   "The field of research in the use of neural networks to help
               artists or advance 2D animation is very underdeveloped. Most
               of the little research that is done does not even ask
               questions that are relevant for animators but is done in a
               pure research mindset. We, however, tried to find a problem
               that would actually be relevant in the animation industry
               and came up with the idea of enhancing the feature quality
               of poorly drawn features in 2D animation. The basis for this
               idea is that, as a cost and time-saving measure, in 2d
               animation features are often drawn in different levels of
               detail depending on the current focus of the scene and other
               factors. The focus will thereby lie on the enhancement of
               characters’ eyes with the idea that other features could
               be done in a similar way in future work. To achieve this
               quality enhancing we train the FUNIT network on a manually
               created dataset consisting of crops of eyes from different
               characters in different quality with the goal that it will
               be able to consistently transform low-quality eye images
               into high-quality eye images",
  month =      apr,
  address =    "Favoritenstrasse 9-11/E193-02, A-1040 Vienna, Austria",
  school =     "Research Unit of Computer Graphics, Institute of Visual
               Computing and Human-Centered Technology, Faculty of
               Informatics, TU Wien ",
  URL =        "https://www.cg.tuwien.ac.at/research/publications/2020/hanko-2019-ani/",
}

@bachelorsthesis{wieser-2019-ani,
  title =      "Classification of Production Ready 2D Animation using
               Contour and Distance Fields",
  author =     "Manuel Wieser",
  year =       "2019",
  abstract =   "Image classification is one of the most common use cases of
               Convolutional Neural Networks. In this thesis, our goal is
               to increase the accuracy of a neural network classifier for
               frames of production ready 2D animations and to create a
               model from a dataset with high accuracy for classification.
               This can be seen as groundwork for future work that applies
               neural networks on production ready 2D animation data, by
               reusing and tweaking the model for different applications. 
               We compare training a neural network with the color channels
               of images to training with grayscale images, predicted
               contours or distance fields generated from those contours.
               Furthermore, different combinations of the data will be used
               to evaluate the best option. This means that the comparison
               of the accuracy not only includes color data compared to
               color with contours and distance fields but every
               combination of the aforementioned four types of input.",
  month =      dec,
  address =    "Favoritenstrasse 9-11/E193-02, A-1040 Vienna, Austria",
  school =     "Research Unit of Computer Graphics, Institute of Visual
               Computing and Human-Centered Technology, Faculty of
               Informatics, TU Wien ",
  URL =        "https://www.cg.tuwien.ac.at/research/publications/2019/wieser-2019-ani/",
}

@article{celarek_adam-2019-qelta,
  title =      "Quantifying the Error of Light Transport Algorithms",
  author =     "Adam Celarek and Wenzel Jakob and Michael Wimmer and Jaakko
               Lehtinen",
  year =       "2019",
  abstract =   "This paper proposes a new methodology for measuring the
               error of unbiased physically based rendering algorithms. The
               current state of the art includes mean squared error (MSE)
               based metrics and visual comparisons of equal-time
               renderings of competing algorithms. Neither is satisfying as
               MSE does not describe behavior and can exhibit significant
               variance, and visual comparisons are inherently subjective.
               Our contribution is two-fold: First, we propose to compute
               many short renderings instead of a single long run and use
               the short renderings to estimate MSE expectation and
               variance as well as per-pixel standard deviation. An
               algorithm that achieves good results in most runs, but with
               occasional outliers is essentially unreliable, which we wish
               to quantify numerically. We use per-pixel standard deviation
               to identify problematic lighting effects of rendering
               algorithms. The second contribution is the error spectrum
               ensemble (ESE), a tool for measuring the distribution of
               error over frequencies. The ESE serves two purposes: It
               reveals correlation between pixels and can be used to detect
               outliers, which offset the amount of error substantially.",
  month =      jul,
  journal =    "Computer Graphics Forum",
  volume =     "38",
  number =     "4",
  doi =        "10.1111/cgf.13775",
  publisher =  "The Eurographics Association and John Wiley & Sons Ltd.",
  pages =      "111--121",
  keywords =   "measuring error, light transport, global illumination",
  URL =        "https://www.cg.tuwien.ac.at/research/publications/2019/celarek_adam-2019-qelta/",
}