@article{cardoso-2022-rtpercept, title = "Training and Predicting Visual Error for Real-Time Applications", author = "Joao Afonso Cardoso and Bernhard Kerbl and Lei Yang and Yury Uralsky and Michael Wimmer", year = "2022", abstract = "Visual error metrics play a fundamental role in the quantification of perceived image similarity. Most recently, use cases for them in real-time applications have emerged, such as content-adaptive shading and shading reuse to increase performance and improve efficiency. A wide range of different metrics has been established, with the most sophisticated being capable of capturing the perceptual characteristics of the human visual system. However, their complexity, computational expense, and reliance on reference images to compare against prevent their generalized use in real-time, restricting such applications to using only the simplest available metrics. In this work, we explore the abilities of convolutional neural networks to predict a variety of visual metrics without requiring either reference or rendered images. Specifically, we train and deploy a neural network to estimate the visual error resulting from reusing shading or using reduced shading rates. The resulting models account for 70%--90% of the variance while achieving up to an order of magnitude faster computation times. Our solution combines image-space information that is readily available in most state-of-the-art deferred shading pipelines with reprojection from previous frames to enable an adequate estimate of visual errors, even in previously unseen regions. We describe a suitable convolutional network architecture and considerations for data preparation for training. We demonstrate the capability of our network to predict complex error metrics at interactive rates in a real-time application that implements content-adaptive shading in a deferred pipeline. Depending on the portion of unseen image regions, our approach can achieve up to 2x performance compared to state-of-the-art methods.", month = may, journal = "Proceedings of the ACM on Computer Graphics and Interactive Techniques", volume = "5", number = "1", issn = "2577-6193", doi = "10.1145/3522625", pages = "17", publisher = "Association for Computing Machinery", pages = "1--17", keywords = "perceptual error, variable rate shading, real-time", URL = "https://www.cg.tuwien.ac.at/research/publications/2022/cardoso-2022-rtpercept/", } @inproceedings{celarek-2022-gmcn, title = "Gaussian Mixture Convolution Networks", author = "Adam Celarek and Pedro Hermosilla-Casajus and Bernhard Kerbl and Timo Ropinski and Michael Wimmer", year = "2022", abstract = "This paper proposes a novel method for deep learning based on the analytical convolution of multidimensional Gaussian mixtures. In contrast to tensors, these do not suffer from the curse of dimensionality and allow for a compact representation, as data is only stored where details exist. Convolution kernels and data are Gaussian mixtures with unconstrained weights, positions, and covariance matrices. Similar to discrete convolutional networks, each convolution step produces several feature channels, represented by independent Gaussian mixtures. Since traditional transfer functions like ReLUs do not produce Gaussian mixtures, we propose using a fitting of these functions instead. This fitting step also acts as a pooling layer if the number of Gaussian components is reduced appropriately. We demonstrate that networks based on this architecture reach competitive accuracy on Gaussian mixtures fitted to the MNIST and ModelNet data sets.", month = apr, publisher = "OpenReview.org", event = "ICLR | 2022", booktitle = "The Tenth International Conference on Learning Representations (ICLR 2022)", pages = "1--23", URL = "https://www.cg.tuwien.ac.at/research/publications/2022/celarek-2022-gmcn/", } @mastersthesis{FRAISS-2022-CGMM, title = "Construction and Visualization of Gaussian Mixture Models from Point Clouds for 3D Object Representation", author = "Simon Maximilian Fraiss", year = "2022", abstract = "Point clouds are a common representation of three-dimensional shapes in computer graphics and 3D-data processing. However, in some applications, other representations are more useful. Gaussian Mixture Models (GMMs) can be used as such an alternative representation. A GMM is a convex sum of normal distributions, which aims to describe a point cloud’s density. In this thesis, we investigate both visualization and construction of GMMs. For visualization, we have implemented a tool that enables both isoellipsoid and density visualization of GMMs. We describe the mathematical backgrounds, the algorithms, and our implementation of this tool. Regarding GMM construction, we investigate several algorithms used in previous papers for constructing GMMs for 3D-data processing tasks. We present our implementations of the expectation-maximization (EM) algorithm and top-down HEM. Additionally, we have adapted the implementation of geometrically regularized bottom-up HEM to produce a fixed number of Gaussians. We evaluate these three algorithms in terms of the quality of their generated GMMs. In many cases, the statistical likelihood, which is maximized by the EM algorithm, is not a reliable indicator for a GMM’s quality. Therefore, we instead rely on the reconstruction error of a reconstructed point cloud based on the Chamfer distance. Additionally, we provide metrics for measuring the reconstructed point cloud’s uniformity and the GMM’s variation of Gaussians. We demonstrate that EM provides the best results in terms of these metrics. Top-down HEM is a fast alternative, and can produce even better results when using fewer input points. The results of geometrically regularized bottom-up HEM are inferior for lower numbers of Gaussians but it can create good GMMs consisting of high numbers of Gaussians very eciently.", month = mar, address = "Favoritenstrasse 9-11/E193-02, A-1040 Vienna, Austria", school = "Research Unit of Computer Graphics, Institute of Visual Computing and Human-Centered Technology, Faculty of Informatics, TU Wien", URL = "https://www.cg.tuwien.ac.at/research/publications/2022/FRAISS-2022-CGMM/", } @inproceedings{cardoso-2021-cost, title = "Cost Volume Refinement for Depth Prediction", author = "Joao Afonso Cardoso and Nuno Goncalves and Michael Wimmer", year = "2021", abstract = "Light-field cameras are becoming more popular in the consumer market. Their data redundancy allows, in theory, to accurately refocus images after acquisition and to predict the depth of each point visible from the camera. Combined, these two features allow for the generation of full-focus images, which is impossible in traditional cameras. Multiple methods for depth prediction from light fields (or stereo) have been proposed over the years. A large subset of these methods relies on cost-volume estimates – 3D objects where each layer represents a heuristic of whether each point in the image is at a certain distance from the camera. Generally, this volume is used to regress a depth map, which is then refined for better results. In this paper, we argue that refining the cost volumes is superior to refining the depth maps in order to further increase the accuracy of depth predictions. We propose a set of cost-volume refinement algorithms and show their effectiveness.", month = jan, isbn = "978-1-7281-8809-6", publisher = "IEEE", location = "Milan, Italy", event = "25th International Conference on Pattern Recognition (ICPR)", doi = "10.1109/ICPR48806.2021.9412730", booktitle = "Proceedings of the 25th International Conference on Pattern Recognition", pages = "354--361", keywords = "depth reconstruction, light fields, cost volumes", URL = "https://www.cg.tuwien.ac.at/research/publications/2021/cardoso-2021-cost/", } @bachelorsthesis{hanko-2019-ani, title = "Higher Hand-Drawn Detail Quality using Convolutional Assistant", author = "Dominik Hanko", year = "2020", abstract = "The field of research in the use of neural networks to help artists or advance 2D animation is very underdeveloped. Most of the little research that is done does not even ask questions that are relevant for animators but is done in a pure research mindset. We, however, tried to find a problem that would actually be relevant in the animation industry and came up with the idea of enhancing the feature quality of poorly drawn features in 2D animation. The basis for this idea is that, as a cost and time-saving measure, in 2d animation features are often drawn in different levels of detail depending on the current focus of the scene and other factors. The focus will thereby lie on the enhancement of characters’ eyes with the idea that other features could be done in a similar way in future work. To achieve this quality enhancing we train the FUNIT network on a manually created dataset consisting of crops of eyes from different characters in different quality with the goal that it will be able to consistently transform low-quality eye images into high-quality eye images", month = apr, address = "Favoritenstrasse 9-11/E193-02, A-1040 Vienna, Austria", school = "Research Unit of Computer Graphics, Institute of Visual Computing and Human-Centered Technology, Faculty of Informatics, TU Wien ", URL = "https://www.cg.tuwien.ac.at/research/publications/2020/hanko-2019-ani/", } @bachelorsthesis{wieser-2019-ani, title = "Classification of Production Ready 2D Animation using Contour and Distance Fields", author = "Manuel Wieser", year = "2019", abstract = "Image classification is one of the most common use cases of Convolutional Neural Networks. In this thesis, our goal is to increase the accuracy of a neural network classifier for frames of production ready 2D animations and to create a model from a dataset with high accuracy for classification. This can be seen as groundwork for future work that applies neural networks on production ready 2D animation data, by reusing and tweaking the model for different applications. We compare training a neural network with the color channels of images to training with grayscale images, predicted contours or distance fields generated from those contours. Furthermore, different combinations of the data will be used to evaluate the best option. This means that the comparison of the accuracy not only includes color data compared to color with contours and distance fields but every combination of the aforementioned four types of input.", month = dec, address = "Favoritenstrasse 9-11/E193-02, A-1040 Vienna, Austria", school = "Research Unit of Computer Graphics, Institute of Visual Computing and Human-Centered Technology, Faculty of Informatics, TU Wien ", URL = "https://www.cg.tuwien.ac.at/research/publications/2019/wieser-2019-ani/", } @article{celarek_adam-2019-qelta, title = "Quantifying the Error of Light Transport Algorithms", author = "Adam Celarek and Wenzel Jakob and Michael Wimmer and Jaakko Lehtinen", year = "2019", abstract = "This paper proposes a new methodology for measuring the error of unbiased physically based rendering algorithms. The current state of the art includes mean squared error (MSE) based metrics and visual comparisons of equal-time renderings of competing algorithms. Neither is satisfying as MSE does not describe behavior and can exhibit significant variance, and visual comparisons are inherently subjective. Our contribution is two-fold: First, we propose to compute many short renderings instead of a single long run and use the short renderings to estimate MSE expectation and variance as well as per-pixel standard deviation. An algorithm that achieves good results in most runs, but with occasional outliers is essentially unreliable, which we wish to quantify numerically. We use per-pixel standard deviation to identify problematic lighting effects of rendering algorithms. The second contribution is the error spectrum ensemble (ESE), a tool for measuring the distribution of error over frequencies. The ESE serves two purposes: It reveals correlation between pixels and can be used to detect outliers, which offset the amount of error substantially.", month = jul, journal = "Computer Graphics Forum", volume = "38", number = "4", doi = "10.1111/cgf.13775", publisher = "The Eurographics Association and John Wiley & Sons Ltd.", pages = "111--121", keywords = "measuring error, light transport, global illumination", URL = "https://www.cg.tuwien.ac.at/research/publications/2019/celarek_adam-2019-qelta/", }