diff --git a/jitify.hpp b/jitify.hpp
index bc8fa14..ed06247 100644
--- a/jitify.hpp
+++ b/jitify.hpp
@@ -94,6 +94,10 @@
 #include <stdint.h>
 #include <dlfcn.h>
 #include <memory>
+#if CUDA_VERSION >= 9000
+#include <cooperative_groups.h>
+#endif
+
 #if __cplusplus >= 201103L
   #define JITIFY_UNIQUE_PTR std::unique_ptr
   #define JITIFY_DEFINE_AUTO_PTR_COPY_WAR(cls)
@@ -179,7 +183,7 @@ class ObjectCache {
 	object_map _objects;
 	key_rank   _ranked_keys;
 	size_t     _capacity;
-	
+
 	inline void discard_old(size_t n=0) {
 		if( n > _capacity ) {
 			throw std::runtime_error("Insufficient capacity in cache");
@@ -250,6 +254,33 @@ class vector : public std::vector<T> {
 #endif
 };
 
+//Helper function to cache sm count used by coop_groups
+  int get_device_SMs(int device_id){
+    static bool init=false;
+    static vector<int> SMs;
+    if(!init){
+      int nDevices;
+      cudaGetDeviceCount(&nDevices);
+      SMs.resize(nDevices);
+      for (int i = 0; i < nDevices; i++) {
+        cudaDeviceProp prop;
+        cudaGetDeviceProperties(&prop, i);
+        SMs[i] = prop.multiProcessorCount;
+      }
+    }
+    init = true;
+    if(device_id >= (int) SMs.size())
+      throw std::runtime_error(std::string("Error: Tried to query device of ordinal > number of devices.\n"));
+    return SMs[device_id];
+  }
+
+  int get_cur_dev_SMs(){
+    int dev;
+    cudaGetDevice(&dev);
+    return get_device_SMs(dev);
+  }
+
+
 // Helper functions for parsing/manipulating source code
 
 std::string replace_characters(std::string str, std::string const& oldchars, char newchar) {
@@ -374,14 +405,14 @@ inline bool extract_include_info_from_compile_error(std::string  log,
 	beg += pattern.size();
 	size_t end = log.find("\"", beg);
 	name = log.substr(beg, end-beg);
-	
+
 	size_t line_beg = log.rfind("\n", beg);
 	if( line_beg == std::string::npos ) {
 		line_beg = 0;
 	} else {
 		line_beg += 1;
 	}
-	
+
 	size_t split = log.find("(", line_beg);
 	parent = log.substr(line_beg, split-line_beg);
 	line_num = atoi(log.substr(split+1,
@@ -513,11 +544,11 @@ inline bool load_source(std::string                        filename,
 	bool remove_next_blank_line = false;
 	while( std::getline(*source_stream, line) ) {
 		++linenum;
-		
+
 		// HACK WAR for static variables not allowed on the device (unless __shared__)
 		// TODO: This breaks static member variables
 		//line = replace_token(line, "static const", "/*static*/ const");
-		
+
 		// TODO: Need to watch out for /* */ comments too
 		std::string cleanline = line.substr(0, line.find("//")); // Strip line comments
 		//if( cleanline.back() == "\r" ) { // Remove Windows line ending
@@ -538,7 +569,7 @@ inline bool load_source(std::string                        filename,
 			//line = "//" + line; // Comment out the #pragma once line
 			continue;
 		}
-		
+
 		// HACK WAR for Thrust using "#define FOO #pragma bar"
 		size_t pragma_beg = cleanline.find("#pragma ");
 		if( pragma_beg != std::string::npos ) {
@@ -551,13 +582,14 @@ inline bool load_source(std::string                        filename,
 				line += " " + pragma_split[2];
 			}
 		}
-		
+
 		source += line + "\n";
 	}
 	// HACK TESTING (WAR for cub)
+
 	//source = "#define cudaDeviceSynchronize() cudaSuccess\n" + source;
 	////source = "cudaError_t cudaDeviceSynchronize() { return cudaSuccess; }\n" + source;
-	
+
 	// WAR for #pragma once causing problems when there are multiple inclusions
 	//   of the same header from different paths.
 	if( pragma_once ) {
@@ -789,7 +821,7 @@ class CUDAKernel {
 	std::string               _func_name;
 	std::string               _ptx;
 	std::vector<CUjit_option> _opts;
-	
+
 	inline void cuda_safe_call(CUresult res) {
 		if( res != CUDA_SUCCESS ) {
 			const char* msg;
@@ -892,7 +924,7 @@ class CUDAKernel {
 		this->destroy_module();
 	}
 	inline operator CUfunction() const { return _kernel; }
-	
+
 	inline CUresult launch(dim3 grid, dim3 block,
 	                       unsigned int smem, CUstream stream,
 	                       std::vector<void*> arg_ptrs) {
@@ -902,6 +934,42 @@ class CUDAKernel {
 		                      smem, stream,
 		                      &arg_ptrs[0], NULL);
 	}
+#if CUDA_VERSION >= 9000
+        inline CUresult coop_max_grid_size(int &max_blocks, dim3 block, size_t smem) {
+
+	  int  SMs = get_cur_dev_SMs();
+	  int  blocksPerSm;
+
+	  CUresult occ_result = cuOccupancyMaxActiveBlocksPerMultiprocessor(&blocksPerSm, _kernel, block.x*block.y*block.z, 0);
+	  max_blocks = blocksPerSm * SMs;
+	  return occ_result;
+	}
+
+        inline CUresult coop_launch(dim3 grid, dim3 block,
+				    unsigned int smem, CUstream stream,
+				    std::vector<void*> arg_ptrs) {
+
+	  int can_coop;
+          int dev;
+	  cudaGetDevice(&dev);
+	  cuDeviceGetAttribute(&can_coop, CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, dev);
+	  if(!can_coop)
+	    throw std::runtime_error(std::string("Error: Cannot use cooperative groups on this device.\n"));
+
+	  int SMs = get_cur_dev_SMs();
+	  int blocksPerSm;
+	  CUresult occ_result = cuOccupancyMaxActiveBlocksPerMultiprocessor(&blocksPerSm, _kernel, block.x*block.y*block.z, 0);
+          if (occ_result != CUDA_SUCCESS) return occ_result;
+
+	  if(blocksPerSm*SMs < (int) grid.x*grid.y*grid.z)
+	    throw std::runtime_error(std::string("Error: Kernel cannot be launched with given grid size. Maximum number of blocks for this kernel is ") +
+				     std::to_string(blocksPerSm*SMs));
+
+	  return cuLaunchCooperativeKernel( _kernel, grid.x, grid.y, grid.z,
+					    block.x, block.y, block.z,
+					    smem, stream, &arg_ptrs[0]);
+       }
+#endif
 };
 
 const char* jitsafe_header_preinclude_h = R"(
@@ -1544,7 +1612,7 @@ inline void split_compiler_and_linker_options(
 	std::vector<std::string>* compiler_options,
 	std::vector<std::string>* linker_files,
 	std::vector<std::string>* linker_paths) {
-	
+
 	for( int i=0; i<(int)options.size(); ++i ) {
 		std::string opt   = options[i];
 		std::string flag  = opt.substr(0, 2);
@@ -1584,14 +1652,14 @@ nvrtcResult compile_kernel(std::string program_name,
 		header_names_c.push_back(name.c_str());
                 header_sources_c.push_back(code.c_str());
 	}
-	
+
 	std::vector<const char*> options_c(options.size()+2);
 	options_c[0] = "--device-as-default-execution-space";
 	options_c[1] = "--pre-include=jitify_preinclude.h";
 	for( int i=0; i<(int)options.size(); ++i ) {
 	  options_c[i+2] = options[i].c_str();
 	}
-	
+
 #if CUDA_VERSION < 8000
 	std::string inst_dummy;
 	if( !instantiation.empty() ) {
@@ -1601,14 +1669,14 @@ nvrtcResult compile_kernel(std::string program_name,
 		program_source += "\nvoid* "+ inst_dummy + " = (void*)" + instantiation + ";\n";
 	}
 #endif
-	
+
 #define CHECK_NVRTC(call) do { \
 		nvrtcResult ret = call; \
 		if( ret != NVRTC_SUCCESS ) { \
 			return ret; \
 		} \
 	} while(0)
-	
+
 	nvrtcProgram nvrtc_program;
 	CHECK_NVRTC( nvrtcCreateProgram(&nvrtc_program,
 	                                program_source.c_str(),
@@ -1616,14 +1684,14 @@ nvrtcResult compile_kernel(std::string program_name,
 	                                num_headers,
 	                                &header_sources_c[0],
 	                                &header_names_c[0]) );
-	
+
 #if CUDA_VERSION >= 8000
 	if( !instantiation.empty() ) {
 		CHECK_NVRTC( nvrtcAddNameExpression(nvrtc_program,
 		                                    instantiation.c_str()) );
 	}
 #endif
-	
+
 	nvrtcResult ret = nvrtcCompileProgram(nvrtc_program,
 	                                      options_c.size(), &options_c[0]);
 	if( log ) {
@@ -1636,7 +1704,7 @@ nvrtcResult compile_kernel(std::string program_name,
 			return ret;
 		}
 	}
-	
+
 	if( ptx ) {
 		size_t ptxsize;
 		CHECK_NVRTC( nvrtcGetPTXSize(nvrtc_program, &ptxsize) );
@@ -1644,7 +1712,7 @@ nvrtcResult compile_kernel(std::string program_name,
 		CHECK_NVRTC( nvrtcGetPTX(nvrtc_program, &vptx[0]) );
 		ptx->assign(&vptx[0], ptxsize);
 	}
-	
+
 	if( !instantiation.empty() && mangled_instantiation ) {
 #if CUDA_VERSION >= 8000
 		const char* mangled_instantiation_cstr;
@@ -1662,7 +1730,7 @@ nvrtcResult compile_kernel(std::string program_name,
 		*mangled_instantiation = ptx->substr(mi_beg, mi_end-mi_beg);
 #endif
 	}
-	
+
 	CHECK_NVRTC( nvrtcDestroyProgram(&nvrtc_program) );
 #undef CHECK_NVRTC
 	return NVRTC_SUCCESS;
@@ -1697,9 +1765,9 @@ class JitCache_impl {
 	inline JitCache_impl(size_t cache_size)
 		: _kernel_cache(cache_size),
 		  _program_config_cache(cache_size) {
-		
+
 		detail::add_options_from_env(_options);
-		
+
 		// Bootstrap the cuda context to avoid errors
 		cudaFree(0);
 	}
@@ -1777,6 +1845,9 @@ class KernelInstantiation_impl {
 	inline KernelInstantiation_impl(KernelInstantiation_impl const&) = default;
 	inline KernelInstantiation_impl(KernelInstantiation_impl&&) = default;
 #endif
+#if CUDA_VERSION >= 9000
+        inline CUresult coop_max_grid_size(int &max_block_size, dim3 block, size_t smem) const;
+#endif
 };
 
 class KernelLauncher_impl {
@@ -1798,6 +1869,10 @@ class KernelLauncher_impl {
 #endif
 	inline CUresult launch(jitify::detail::vector<void*> arg_ptrs,
 	                       jitify::detail::vector<std::string> arg_types=0) const;
+#if CUDA_VERSION >= 9000
+  inline CUresult coop_launch(jitify::detail::vector<void*> arg_ptrs,
+	                       jitify::detail::vector<std::string> arg_types=0) const;
+#endif
 };
 
 /*! An object representing a configured and instantiated kernel ready
@@ -1825,6 +1900,12 @@ class KernelLauncher {
 	                       jitify::detail::vector<std::string> arg_types=0) const {
 		return _impl->launch(arg_ptrs, arg_types);
 	}
+#if CUDA_VERSION >= 9000
+        inline CUresult coop_launch(std::vector<void*> arg_ptrs=std::vector<void*>(),
+	                       jitify::detail::vector<std::string> arg_types=0) const {
+		return _impl->coop_launch(arg_ptrs, arg_types);
+	}
+#endif
 #if __cplusplus >= 201103L
 	// Regular function call syntax
 	/*! Launch the kernel.
@@ -1844,6 +1925,13 @@ class KernelLauncher {
 		return this->launch(std::vector<void*>({(void*)&args...}),
 		                    {reflection::reflect<ArgTypes>()...});
 	}
+#if CUDA_VERSION >= 9000
+	template<typename... ArgTypes>
+	inline CUresult coop_launch(ArgTypes... args) const {
+		return this->coop_launch(std::vector<void*>({(void*)&args...}),
+		                    {reflection::reflect<ArgTypes>()...});
+	}
+#endif //cuda_version
 #endif
 };
 
@@ -1876,6 +1964,11 @@ class KernelInstantiation {
 	                                size_t smem=0, cudaStream_t stream=0) const {
 		return KernelLauncher(*this, grid, block, smem, stream);
 	}
+#if CUDA_VERSION >= 9000
+        inline CUresult coop_max_grid_size(int &max_block_size, dim3 block, size_t smem) const{
+	  return _impl->coop_max_grid_size(max_block_size, block, smem);
+	}
+#endif
 };
 
 /*! An object representing a kernel made up of a Program, a name and options.
@@ -1985,7 +2078,7 @@ class JitCache {
 	enum { DEFAULT_CACHE_SIZE = 128 };
 	JitCache(size_t cache_size=DEFAULT_CACHE_SIZE)
 		: _impl(new JitCache_impl(cache_size)) {}
-	
+
 	/*! Create a program.
 	 *
 	 *  \param source A string containing either the source filename or
@@ -2069,7 +2162,7 @@ std::ostream& operator<<(std::ostream& stream, dim3 d) {
 
 inline CUresult KernelLauncher_impl::launch(jitify::detail::vector<void*> arg_ptrs,
                                             jitify::detail::vector<std::string> arg_types) const {
-	
+
 #if JITIFY_PRINT_LAUNCH
 	Kernel_impl const& kernel = _kernel_inst._kernel;
 	std::string arg_types_string = (arg_types.empty() ? "..." :
@@ -2085,6 +2178,30 @@ inline CUresult KernelLauncher_impl::launch(jitify::detail::vector<void*> arg_pt
 	return _kernel_inst._cuda_kernel->launch(_grid, _block, _smem, _stream,
 	                                          arg_ptrs);
 }
+#if CUDA_VERSION >= 9000
+inline CUresult KernelLauncher_impl::coop_launch(jitify::detail::vector<void*> arg_ptrs,
+                                            jitify::detail::vector<std::string> arg_types) const {
+#if JITIFY_PRINT_LAUNCH
+	Kernel_impl const& kernel = _kernel_inst._kernel;
+	std::string arg_types_string = (arg_types.empty() ? "..." :
+	                                reflection::reflect_list(arg_types));
+	std::cout << "Launching "
+	          << kernel._name
+	          << _kernel_inst._template_inst
+	          << "<<<" << _grid << "," << _block
+	          << "," << _smem << "," << _stream << ">>>"
+	          << "(" << arg_types_string << ")"
+	          << std::endl;
+#endif
+	return _kernel_inst._cuda_kernel->coop_launch(_grid, _block, _smem, _stream,
+	                                          arg_ptrs);
+}
+
+inline CUresult KernelInstantiation_impl::coop_max_grid_size(int &max_blocks, dim3 block, size_t smem) const{
+  return _cuda_kernel->coop_max_grid_size(max_blocks, block, smem);
+}
+#endif //cuda_version
+
 
 KernelInstantiation_impl::KernelInstantiation_impl(Kernel_impl const& kernel,
                                                    std::vector<std::string> const& template_args)
@@ -2123,15 +2240,15 @@ void KernelInstantiation_impl::print() const {
 
 void KernelInstantiation_impl::build_kernel() {
 	Program_impl const& program = _kernel._program;
-	
+
 	std::string instantiation = _kernel._name + _template_inst;
-	
+
 	std::vector<std::string> compiler_options;
 	std::vector<std::string> linker_files;
 	std::vector<std::string> linker_paths;
 	detail::split_compiler_and_linker_options(
 		_options, &compiler_options, &linker_files, &linker_paths);
-	
+
 	std::string log;
 	std::string ptx;
 	std::string mangled_instantiation;
@@ -2148,7 +2265,7 @@ void KernelInstantiation_impl::build_kernel() {
 		throw std::runtime_error(std::string("NVRTC error: ") +
 		                         nvrtcGetErrorString(ret));
 	}
-	
+
 #if JITIFY_PRINT_PTX
 	std::cout << "---------------------------------------" << std::endl;
 	std::cout << mangled_instantiation << std::endl;
@@ -2158,7 +2275,7 @@ void KernelInstantiation_impl::build_kernel() {
 	std::cout << ptx << std::endl;
 	std::cout << "---------------------------------------" << std::endl;
 #endif
-	
+
 	_cuda_kernel->set(mangled_instantiation.c_str(), ptx.c_str(),
 	                  linker_files, linker_paths);
 }
@@ -2222,7 +2339,7 @@ void Program_impl::load_sources(std::string              source,
 	std::vector<std::string>&  include_paths = _config->include_paths;
 	std::string&               name          = _config->name;
 	ProgramConfig::source_map& sources       = _config->sources;
-	
+
 	// Extract include paths from compile options
 	std::vector<std::string>::iterator iter = options.begin();
 	while( iter != options.end() ) {
@@ -2236,7 +2353,7 @@ void Program_impl::load_sources(std::string              source,
 		}
 	}
 	_config->options = options;
-	
+
 	// Load program source
 	if( !detail::load_source(source, sources,
 	                          "", include_paths,
@@ -2244,7 +2361,7 @@ void Program_impl::load_sources(std::string              source,
 		throw std::runtime_error("Source not found: " + source);
 	}
 	name = sources.begin()->first;
-	
+
 	// Load header sources
 	for( int i=0; i<(int)headers.size(); ++i ) {
 		if( !detail::load_source(headers[i],
@@ -2255,7 +2372,7 @@ void Program_impl::load_sources(std::string              source,
 			throw std::runtime_error("Source not found: " + headers[i]);
 		}
 	}
-	
+
 #if JITIFY_PRINT_SOURCE
 	std::string& program_source = sources[name];
 	std::cout << "---------------------------------------" << std::endl;
@@ -2264,13 +2381,13 @@ void Program_impl::load_sources(std::string              source,
 	detail::print_with_line_numbers(program_source);
 	std::cout << "---------------------------------------" << std::endl;
 #endif
-	
+
 	std::vector<std::string> compiler_options;
 	std::vector<std::string> linker_files;
 	std::vector<std::string> linker_paths;
 	detail::split_compiler_and_linker_options(
 		options, &compiler_options, &linker_files, &linker_paths);
-	
+
 	std::string log;
 	nvrtcResult ret;
 	while( (ret = detail::compile_kernel(name, sources, compiler_options, "", &log))
@@ -2289,7 +2406,7 @@ void Program_impl::load_sources(std::string              source,
 			// TODO: How to handle error?
 			throw std::runtime_error("Runtime compilation failed");
 		}
-		
+
 		// Try to load the new header
 		std::string include_path = detail::path_base(include_parent);
 		if( !detail::load_source(include_name, sources,
@@ -2297,13 +2414,13 @@ void Program_impl::load_sources(std::string              source,
 		                         file_callback) ) {
 			// Comment-out the include line and print a warning
 			if( !sources.count(include_parent) ) {
-				
+
 				// ***TODO: Unless there's another mechanism (e.g., potentially
 				//            the parent path vs. filename problem), getting
 				//            here means include_parent was found automatically
 				//            in a system include path.
 				//            We need a WAR to zap it from *its parent*.
-				
+
 				for( ProgramConfig::source_map::const_iterator it=sources.begin();
 				     it != sources.end(); ++it ) {
 					std::cout << "  " << it->first << std::endl;
@@ -2482,7 +2599,7 @@ CUresult parallel_for(ExecutionPolicy     policy,
                       IndexType           end,
                       Lambda<Func> const& lambda) {
 	using namespace jitify;
-	
+
 	if( policy.location == HOST ) {
 #ifdef _OPENMP
 #pragma omp parallel for
@@ -2500,7 +2617,7 @@ CUresult parallel_for(ExecutionPolicy     policy,
 	arg_decls.insert(arg_decls.end(),
 	                 lambda._capture._arg_decls.begin(),
 	                 lambda._capture._arg_decls.end());
-	
+
 	std::stringstream source_ss;
 	source_ss << "parallel_for_program\n";
 	for( auto const& header : policy.headers ) {
@@ -2517,7 +2634,7 @@ CUresult parallel_for(ExecutionPolicy     policy,
 		"	" << "\t" << lambda._func_string << ";\n" <<
 		"	}\n"
 		"}\n";
-	
+
 	Program program = kernel_cache.program(source_ss.str(),
 	                                       policy.headers,
 	                                       policy.options,
@@ -2529,7 +2646,7 @@ CUresult parallel_for(ExecutionPolicy     policy,
 	arg_ptrs.insert(arg_ptrs.end(),
 	                lambda._capture._arg_ptrs.begin(),
 	                lambda._capture._arg_ptrs.end());
-	
+
 	size_t n = end - begin;
 	dim3 block(policy.block_size);
 	dim3 grid(std::min((n-1) / block.x + 1, size_t(65535)));
diff --git a/jitify_example.cpp b/jitify_example.cpp
index 1b4f513..65ccf47 100644
--- a/jitify_example.cpp
+++ b/jitify_example.cpp
@@ -129,12 +129,12 @@ bool test_kernels() {
 		"    }\n"
 		"}\n"
 		;
-	
+
 	using jitify::reflection::reflect;
 	using jitify::reflection::NonType;
 	using jitify::reflection::Type;
 	using jitify::reflection::type_of;
-	
+
 	thread_local static jitify::JitCache kernel_cache;
 	jitify::Program program = kernel_cache.program
 		(program1, // Code string specified above
@@ -142,14 +142,14 @@ bool test_kernels() {
 		 {"--use_fast_math",
 		  "-I/usr/local/cuda/include"},
 		 file_callback);
-	
+
 	T* indata;
 	T* outdata;
 	cudaMalloc((void**)&indata,  sizeof(T));
 	cudaMalloc((void**)&outdata, sizeof(T));
 	T inval = 3.14159f;
 	cudaMemcpy(indata, &inval, sizeof(T), cudaMemcpyHostToDevice);
-	
+
 	dim3 grid(1);
 	dim3 block(1);
 	CHECK_CUDA(program
@@ -179,14 +179,14 @@ bool test_kernels() {
 	            .instantiate((int)C, type_of(*indata))
 	            .configure(grid,block)
 	            .launch(indata, outdata) );
-	
+
 	T outval = 0;
 	cudaMemcpy(&outval, outdata, sizeof(T), cudaMemcpyDeviceToHost);
 	cudaFree(outdata);
 	cudaFree(indata);
-	
+
 	std::cout << inval << " -> " << outval << std::endl;
-	
+
 	return are_close(inval, outval);
 }
 
@@ -196,16 +196,16 @@ bool test_parallel_for() {
 	T* d_out;
 	cudaMalloc((void**)&d_out, n*sizeof(T));
 	T val = 3.14159f;
-	
+
 	jitify::ExecutionPolicy policy(jitify::DEVICE);
 	auto lambda = JITIFY_LAMBDA( (d_out, val) , d_out[i] = i*val );
 	CHECK_CUDA(jitify::parallel_for(policy, 0, n, lambda) );
-	
+
 	std::vector<T> h_out(n);
 	cudaMemcpy(&h_out[0], d_out, n*sizeof(T), cudaMemcpyDeviceToHost);
-	
+
 	cudaFree(d_out);
-	
+
 	for( int i=0; i<n; ++i ) {
 		if( !are_close(h_out[i], i*val) ) {
 			std::cout << h_out[i] << " != " << i*val << std::endl;
@@ -220,7 +220,7 @@ bool test_parallel_for() {
 int main(int argc, char *argv[]) {
 #if __cplusplus >= 201103L
 #define TEST_RESULT(result) (result ? "PASSED" : "FAILED")
-	
+
 	// Uncached
 	bool test_simple_result       = test_simple<float>();
 	bool test_kernels_result      = test_kernels<float>();
@@ -229,11 +229,11 @@ int main(int argc, char *argv[]) {
 	test_simple_result       &= test_simple<float>();
 	test_kernels_result      &= test_kernels<float>();
 	test_parallel_for_result &= test_parallel_for<float>();
-	
+
 	std::cout << "test_simple<float>:       " << TEST_RESULT(test_simple_result) << std::endl;
 	std::cout << "test_kernels<float>:      " << TEST_RESULT(test_kernels_result) << std::endl;
 	std::cout << "test_parallel_for<float>: " << TEST_RESULT(test_parallel_for_result) << std::endl;
-	
+
 	return (!test_simple_result +
 	        !test_kernels_result +
 	        !test_parallel_for_result);
diff --git a/stringify.cpp b/stringify.cpp
index 9647fbe..75c129a 100644
--- a/stringify.cpp
+++ b/stringify.cpp
@@ -81,4 +81,3 @@ int main(int argc, char *argv[]) {
 	ostream << ";" << std::endl;
 	return 0;
 }
-