Initial commit
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Build Kernel / Build all affected Kernels (push) Has been cancelled
				
			
		
			
				
	
				Build all core packages / Build all core packages for selected target (push) Has been cancelled
				
			
		
			
				
	
				Build and Push prebuilt tools container / Build and Push all prebuilt containers (push) Has been cancelled
				
			
		
			
				
	
				Build Toolchains / Build Toolchains for each target (push) Has been cancelled
				
			
		
			
				
	
				Build host tools / Build host tools for linux and macos based systems (push) Has been cancelled
				
			
		
			
				
	
				Coverity scan build / Coverity x86/64 build (push) Has been cancelled
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Build Kernel / Build all affected Kernels (push) Has been cancelled
				
			Build all core packages / Build all core packages for selected target (push) Has been cancelled
				
			Build and Push prebuilt tools container / Build and Push all prebuilt containers (push) Has been cancelled
				
			Build Toolchains / Build Toolchains for each target (push) Has been cancelled
				
			Build host tools / Build host tools for linux and macos based systems (push) Has been cancelled
				
			Coverity scan build / Coverity x86/64 build (push) Has been cancelled
				
			This commit is contained in:
		
							
								
								
									
										11
									
								
								package/libs/zlib/Config.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								package/libs/zlib/Config.in
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| menu "Configuration" | ||||
| 	depends on PACKAGE_zlib | ||||
|  | ||||
| config ZLIB_OPTIMIZE_SPEED | ||||
| 	bool "Optimize for speed" | ||||
| 	help | ||||
| 		This enables additional optimization and  | ||||
| 		increases performance considerably at  | ||||
| 		the expense of binary size. | ||||
|  | ||||
| endmenu | ||||
							
								
								
									
										98
									
								
								package/libs/zlib/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								package/libs/zlib/Makefile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| # | ||||
| # Copyright (C) 2006-2013 OpenWrt.org | ||||
| # | ||||
| # This is free software, licensed under the GNU General Public License v2. | ||||
| # See /LICENSE for more information. | ||||
| # | ||||
|  | ||||
| include $(TOPDIR)/rules.mk | ||||
|  | ||||
| PKG_NAME:=zlib | ||||
| PKG_VERSION:=1.3.1 | ||||
| PKG_RELEASE:=1 | ||||
|  | ||||
| PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.xz | ||||
| PKG_SOURCE_URL:=https://github.com/madler/zlib/releases/download/v$(PKG_VERSION) | ||||
| PKG_HASH:=38ef96b8dfe510d42707d9c781877914792541133e1870841463bfa73f883e32 | ||||
|  | ||||
| PKG_LICENSE:=Zlib | ||||
| PKG_LICENSE_FILES:=README | ||||
| PKG_CPE_ID:=cpe:/a:gnu:zlib | ||||
|  | ||||
| PKG_CONFIG_DEPENDS:= CONFIG_ZLIB_OPTIMIZE_SPEED | ||||
|  | ||||
| include $(INCLUDE_DIR)/package.mk | ||||
| include $(INCLUDE_DIR)/cmake.mk | ||||
|  | ||||
| define Package/zlib | ||||
|   SECTION:=libs | ||||
|   CATEGORY:=Libraries | ||||
|   TITLE:=Library implementing the deflate compression method | ||||
|   URL:=http://www.zlib.net/ | ||||
| endef | ||||
|  | ||||
| define Package/zlib-dev | ||||
|   SECTION:=devel | ||||
|   CATEGORY:=Development | ||||
|   SUBMENU:=Libraries | ||||
|   DEPENDS:=zlib | ||||
|   TITLE:=Development files for the zlib library | ||||
| endef | ||||
|  | ||||
| define Package/zlib/description | ||||
|  zlib is a lossless data-compression library. | ||||
|  This package includes the shared library. | ||||
| endef | ||||
|  | ||||
| define Package/zlib-dev/description | ||||
|  zlib is a lossless data-compression library. | ||||
|  This package includes the development support files. | ||||
| endef | ||||
|  | ||||
| define Package/zlib/config | ||||
| 	source "$(SOURCE)/Config.in" | ||||
| endef | ||||
|  | ||||
| TARGET_CFLAGS += $(FPIC) | ||||
|  | ||||
| ifeq ($(CONFIG_ZLIB_OPTIMIZE_SPEED),y) | ||||
| 	TARGET_CFLAGS := $(filter-out -O%,$(TARGET_CFLAGS)) -O3 | ||||
| endif | ||||
|  | ||||
| CMAKE_OPTIONS += \ | ||||
| 	-DARMv8=$$$$(echo -e '\#ifdef __ARM_NEON__\nON\n\#else\nOFF\n\#endif' | $$(TARGET_CC) $$(TARGET_CFLAGS) -x c -E - | grep -xE 'ON|OFF') | ||||
|  | ||||
| define Build/InstallDev | ||||
| 	mkdir -p $(1)/usr/include | ||||
| 	$(CP)	$(PKG_INSTALL_DIR)/usr/include/z{conf,lib}.h \ | ||||
| 		$(1)/usr/include/ | ||||
| 	mkdir -p $(1)/usr/lib | ||||
| 	$(CP)	$(PKG_INSTALL_DIR)/usr/lib/libz.{a,so*} \ | ||||
| 		$(1)/usr/lib/ | ||||
| 	mkdir -p $(1)/usr/lib/pkgconfig | ||||
| 	$(CP)	$(PKG_INSTALL_DIR)/usr/share/pkgconfig/zlib.pc \ | ||||
| 		$(1)/usr/lib/pkgconfig/ | ||||
| endef | ||||
|  | ||||
| # libz.so is needed for openssl (zlib-dynamic) | ||||
| define Package/zlib/install | ||||
| 	$(INSTALL_DIR) $(1)/usr/lib | ||||
| 	$(CP) $(PKG_INSTALL_DIR)/usr/lib/libz.so $(1)/usr/lib/ | ||||
| 	$(CP) $(PKG_INSTALL_DIR)/usr/lib/libz.so.* $(1)/usr/lib/ | ||||
| endef | ||||
|  | ||||
| define Package/zlib-dev/install | ||||
| 	$(INSTALL_DIR) $(1)/usr/include | ||||
| 	$(INSTALL_DATA) $(PKG_INSTALL_DIR)/usr/include/zconf.h \ | ||||
| 	  $(1)/usr/include/ | ||||
| 	$(INSTALL_DATA) $(PKG_INSTALL_DIR)/usr/include/zlib.h \ | ||||
| 	  $(1)/usr/include/ | ||||
| 	$(INSTALL_DIR) $(1)/usr/lib | ||||
| 	$(CP) $(PKG_INSTALL_DIR)/usr/lib/libz.a $(1)/usr/lib/ | ||||
| 	$(INSTALL_DIR) $(1)/usr/lib/pkgconfig | ||||
| 	$(INSTALL_DATA) $(PKG_INSTALL_DIR)/usr/share/pkgconfig/zlib.pc \ | ||||
| 	  $(1)/usr/lib/pkgconfig/ | ||||
| endef | ||||
|  | ||||
| $(eval $(call BuildPackage,zlib)) | ||||
| $(eval $(call BuildPackage,zlib-dev)) | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -0,0 +1,496 @@ | ||||
| From 247147654fe5cd11cf15d8dff91440405ea57040 Mon Sep 17 00:00:00 2001 | ||||
| From: Simon Hosie <simon.hosie@arm.com> | ||||
| Date: Wed, 12 Apr 2017 15:44:21 -0700 | ||||
| Subject: [PATCH 2/2] Inflate using wider loads and stores | ||||
|  | ||||
| In inflate_fast() the output pointer always has plenty of room to write. This | ||||
| means that so long as the target is capable, wide un-aligned loads and stores | ||||
| can be used to transfer several bytes at once. When the reference distance is | ||||
| too short simply unroll the data a little to increase the distance. | ||||
|  | ||||
| Change-Id: I59854eb25d2b1e43561c8a2afaf9175bf10cf674 | ||||
| --- | ||||
|  contrib/arm/chunkcopy.h | 279 ++++++++++++++++++++++++++++++++++++++++++++++++ | ||||
|  contrib/arm/inffast.c   |  96 +++++++---------- | ||||
|  contrib/arm/inflate.c   |  22 ++-- | ||||
|  3 files changed, 335 insertions(+), 62 deletions(-) | ||||
|  create mode 100644 contrib/arm/chunkcopy.h | ||||
|  | ||||
| --- /dev/null | ||||
| +++ b/contrib/arm/chunkcopy.h | ||||
| @@ -0,0 +1,279 @@ | ||||
| +/* chunkcopy.h -- fast copies and sets | ||||
| + * Copyright (C) 2017 ARM, Inc. | ||||
| + * For conditions of distribution and use, see copyright notice in zlib.h | ||||
| + */ | ||||
| + | ||||
| +#ifndef CHUNKCOPY_H | ||||
| +#define CHUNKCOPY_H | ||||
| + | ||||
| +#include "zutil.h" | ||||
| +#include <arm_neon.h> | ||||
| + | ||||
| +#if __STDC_VERSION__ >= 199901L | ||||
| +#define Z_RESTRICT restrict | ||||
| +#else | ||||
| +#define Z_RESTRICT | ||||
| +#endif | ||||
| + | ||||
| +typedef uint8x16_t chunkcopy_chunk_t; | ||||
| +#define CHUNKCOPY_CHUNK_SIZE sizeof(chunkcopy_chunk_t) | ||||
| + | ||||
| +/* | ||||
| +   Ask the compiler to perform a wide, unaligned load with an machine | ||||
| +   instruction appropriate for the chunkcopy_chunk_t type. | ||||
| + */ | ||||
| +static inline chunkcopy_chunk_t loadchunk(const unsigned char FAR *s) { | ||||
| +    chunkcopy_chunk_t c; | ||||
| +    __builtin_memcpy(&c, s, sizeof(c)); | ||||
| +    return c; | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Ask the compiler to perform a wide, unaligned store with an machine | ||||
| +   instruction appropriate for the chunkcopy_chunk_t type. | ||||
| + */ | ||||
| +static inline void storechunk(unsigned char FAR *d, chunkcopy_chunk_t c) { | ||||
| +    __builtin_memcpy(d, &c, sizeof(c)); | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Perform a memcpy-like operation, but assume that length is non-zero and that | ||||
| +   it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if | ||||
| +   the length is shorter than this. | ||||
| + | ||||
| +   It also guarantees that it will properly unroll the data if the distance | ||||
| +   between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on | ||||
| +   in chunkcopy_relaxed(). | ||||
| + | ||||
| +   Aside from better memory bus utilisation, this means that short copies | ||||
| +   (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop | ||||
| +   without iteration, which will hopefully make the branch prediction more | ||||
| +   reliable. | ||||
| + */ | ||||
| +static inline unsigned char FAR *chunkcopy_core(unsigned char FAR *out, | ||||
| +                                                const unsigned char FAR *from, | ||||
| +                                                unsigned len) { | ||||
| +    int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1; | ||||
| +    storechunk(out, loadchunk(from)); | ||||
| +    out += bump; | ||||
| +    from += bump; | ||||
| +    len /= CHUNKCOPY_CHUNK_SIZE; | ||||
| +    while (len-- > 0) { | ||||
| +        storechunk(out, loadchunk(from)); | ||||
| +        out += CHUNKCOPY_CHUNK_SIZE; | ||||
| +        from += CHUNKCOPY_CHUNK_SIZE; | ||||
| +    } | ||||
| +    return out; | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Like chunkcopy_core, but avoid writing beyond of legal output. | ||||
| + | ||||
| +   Accepts an additional pointer to the end of safe output.  A generic safe | ||||
| +   copy would use (out + len), but it's normally the case that the end of the | ||||
| +   output buffer is beyond the end of the current copy, and this can still be | ||||
| +   exploited. | ||||
| + */ | ||||
| +static inline unsigned char FAR *chunkcopy_core_safe(unsigned char FAR *out, | ||||
| +                                                     const unsigned char FAR * from, | ||||
| +                                                     unsigned len, | ||||
| +                                                     unsigned char FAR *limit) { | ||||
| +    Assert(out + len <= limit, "chunk copy exceeds safety limit"); | ||||
| +    if (limit - out < CHUNKCOPY_CHUNK_SIZE) { | ||||
| +        const unsigned char FAR * Z_RESTRICT rfrom = from; | ||||
| +        if (len & 8) { __builtin_memcpy(out, rfrom, 8); out += 8; rfrom += 8; } | ||||
| +        if (len & 4) { __builtin_memcpy(out, rfrom, 4); out += 4; rfrom += 4; } | ||||
| +        if (len & 2) { __builtin_memcpy(out, rfrom, 2); out += 2; rfrom += 2; } | ||||
| +        if (len & 1) { *out++ = *rfrom++; } | ||||
| +        return out; | ||||
| +    } | ||||
| +    return chunkcopy_core(out, from, len); | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Perform short copies until distance can be rewritten as being at least | ||||
| +   CHUNKCOPY_CHUNK_SIZE. | ||||
| + | ||||
| +   This assumes that it's OK to overwrite at least the first | ||||
| +   2*CHUNKCOPY_CHUNK_SIZE bytes of output even if the copy is shorter than | ||||
| +   this.  This assumption holds within inflate_fast() which starts every | ||||
| +   iteration with at least 258 bytes of output space available (258 being the | ||||
| +   maximum length output from a single token; see inffast.c). | ||||
| + */ | ||||
| +static inline unsigned char FAR *chunkunroll_relaxed(unsigned char FAR *out, | ||||
| +                                                     unsigned FAR *dist, | ||||
| +                                                     unsigned FAR *len) { | ||||
| +    const unsigned char FAR *from = out - *dist; | ||||
| +    while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) { | ||||
| +        storechunk(out, loadchunk(from)); | ||||
| +        out += *dist; | ||||
| +        *len -= *dist; | ||||
| +        *dist += *dist; | ||||
| +    } | ||||
| +    return out; | ||||
| +} | ||||
| + | ||||
| + | ||||
| +static inline uint8x16_t chunkset_vld1q_dup_u8x8(const unsigned char FAR * Z_RESTRICT from) { | ||||
| +#if defined(__clang__) || defined(__aarch64__) | ||||
| +    return vreinterpretq_u8_u64(vld1q_dup_u64((void *)from)); | ||||
| +#else | ||||
| +    /* 32-bit GCC uses an alignment hint for vld1q_dup_u64, even when given a | ||||
| +     * void pointer, so here's an alternate implementation. | ||||
| +     */ | ||||
| +    uint8x8_t h = vld1_u8(from); | ||||
| +    return vcombine_u8(h, h); | ||||
| +#endif | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Perform an overlapping copy which behaves as a memset() operation, but | ||||
| +   supporting periods other than one, and assume that length is non-zero and | ||||
| +   that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output | ||||
| +   even if the length is shorter than this. | ||||
| + */ | ||||
| +static inline unsigned char FAR *chunkset_core(unsigned char FAR *out, | ||||
| +                                               unsigned period, | ||||
| +                                               unsigned len) { | ||||
| +    uint8x16_t f; | ||||
| +    int bump = ((len - 1) % sizeof(f)) + 1; | ||||
| + | ||||
| +    switch (period) { | ||||
| +    case 1: | ||||
| +        f = vld1q_dup_u8(out - 1); | ||||
| +        vst1q_u8(out, f); | ||||
| +        out += bump; | ||||
| +        len -= bump; | ||||
| +        while (len > 0) { | ||||
| +            vst1q_u8(out, f); | ||||
| +            out += sizeof(f); | ||||
| +            len -= sizeof(f); | ||||
| +        } | ||||
| +        return out; | ||||
| +    case 2: | ||||
| +        f = vreinterpretq_u8_u16(vld1q_dup_u16((void *)(out - 2))); | ||||
| +        vst1q_u8(out, f); | ||||
| +        out += bump; | ||||
| +        len -= bump; | ||||
| +        if (len > 0) { | ||||
| +            f = vreinterpretq_u8_u16(vld1q_dup_u16((void *)(out - 2))); | ||||
| +            do { | ||||
| +                vst1q_u8(out, f); | ||||
| +                out += sizeof(f); | ||||
| +                len -= sizeof(f); | ||||
| +            } while (len > 0); | ||||
| +        } | ||||
| +        return out; | ||||
| +    case 4: | ||||
| +        f = vreinterpretq_u8_u32(vld1q_dup_u32((void *)(out - 4))); | ||||
| +        vst1q_u8(out, f); | ||||
| +        out += bump; | ||||
| +        len -= bump; | ||||
| +        if (len > 0) { | ||||
| +            f = vreinterpretq_u8_u32(vld1q_dup_u32((void *)(out - 4))); | ||||
| +            do { | ||||
| +                vst1q_u8(out, f); | ||||
| +                out += sizeof(f); | ||||
| +                len -= sizeof(f); | ||||
| +            } while (len > 0); | ||||
| +        } | ||||
| +        return out; | ||||
| +    case 8: | ||||
| +        f = chunkset_vld1q_dup_u8x8(out - 8); | ||||
| +        vst1q_u8(out, f); | ||||
| +        out += bump; | ||||
| +        len -= bump; | ||||
| +        if (len > 0) { | ||||
| +            f = chunkset_vld1q_dup_u8x8(out - 8); | ||||
| +            do { | ||||
| +                vst1q_u8(out, f); | ||||
| +                out += sizeof(f); | ||||
| +                len -= sizeof(f); | ||||
| +            } while (len > 0); | ||||
| +        } | ||||
| +        return out; | ||||
| +    } | ||||
| +    out = chunkunroll_relaxed(out, &period, &len); | ||||
| +    return chunkcopy_core(out, out - period, len); | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Perform a memcpy-like operation, but assume that length is non-zero and that | ||||
| +   it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if | ||||
| +   the length is shorter than this. | ||||
| + | ||||
| +   Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour | ||||
| +   of overlapping buffers, regardless of the distance between the pointers. | ||||
| +   This is reflected in the `restrict`-qualified pointers, allowing the | ||||
| +   compiler to reorder loads and stores. | ||||
| + */ | ||||
| +static inline unsigned char FAR *chunkcopy_relaxed(unsigned char FAR * Z_RESTRICT out, | ||||
| +                                                   const unsigned char FAR * Z_RESTRICT from, | ||||
| +                                                   unsigned len) { | ||||
| +    return chunkcopy_core(out, from, len); | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Like chunkcopy_relaxed, but avoid writing beyond of legal output. | ||||
| + | ||||
| +   Unlike chunkcopy_core_safe() above, no guarantee is made regarding the | ||||
| +   behaviour of overlapping buffers, regardless of the distance between the | ||||
| +   pointers.  This is reflected in the `restrict`-qualified pointers, allowing | ||||
| +   the compiler to reorder loads and stores. | ||||
| + | ||||
| +   Accepts an additional pointer to the end of safe output.  A generic safe | ||||
| +   copy would use (out + len), but it's normally the case that the end of the | ||||
| +   output buffer is beyond the end of the current copy, and this can still be | ||||
| +   exploited. | ||||
| + */ | ||||
| +static inline unsigned char FAR *chunkcopy_safe(unsigned char FAR *out, | ||||
| +                                                const unsigned char FAR * Z_RESTRICT from, | ||||
| +                                                unsigned len, | ||||
| +                                                unsigned char FAR *limit) { | ||||
| +    Assert(out + len <= limit, "chunk copy exceeds safety limit"); | ||||
| +    return chunkcopy_core_safe(out, from, len, limit); | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Perform chunky copy within the same buffer, where the source and destination | ||||
| +   may potentially overlap. | ||||
| + | ||||
| +   Assumes that len > 0 on entry, and that it's safe to write at least | ||||
| +   CHUNKCOPY_CHUNK_SIZE*3 bytes to the output. | ||||
| + */ | ||||
| +static inline unsigned char FAR *chunkcopy_lapped_relaxed(unsigned char FAR *out, | ||||
| +                                                          unsigned dist, | ||||
| +                                                          unsigned len) { | ||||
| +    if (dist < len && dist < CHUNKCOPY_CHUNK_SIZE) { | ||||
| +        return chunkset_core(out, dist, len); | ||||
| +    } | ||||
| +    return chunkcopy_core(out, out - dist, len); | ||||
| +} | ||||
| + | ||||
| +/* | ||||
| +   Behave like chunkcopy_lapped_relaxed, but avoid writing beyond of legal output. | ||||
| + | ||||
| +   Accepts an additional pointer to the end of safe output.  A generic safe | ||||
| +   copy would use (out + len), but it's normally the case that the end of the | ||||
| +   output buffer is beyond the end of the current copy, and this can still be | ||||
| +   exploited. | ||||
| + */ | ||||
| +static inline unsigned char FAR *chunkcopy_lapped_safe(unsigned char FAR *out, | ||||
| +                                                       unsigned dist, | ||||
| +                                                       unsigned len, | ||||
| +                                                       unsigned char FAR *limit) { | ||||
| +    Assert(out + len <= limit, "chunk copy exceeds safety limit"); | ||||
| +    if (limit - out < CHUNKCOPY_CHUNK_SIZE * 3) { | ||||
| +        /* TODO: try harder to optimise this */ | ||||
| +        while (len-- > 0) { | ||||
| +            *out = *(out - dist); | ||||
| +            out++; | ||||
| +        } | ||||
| +        return out; | ||||
| +    } | ||||
| +    return chunkcopy_lapped_relaxed(out, dist, len); | ||||
| +} | ||||
| + | ||||
| +#undef Z_RESTRICT | ||||
| + | ||||
| +#endif /* CHUNKCOPY_H */ | ||||
| --- a/contrib/arm/inffast.c | ||||
| +++ b/contrib/arm/inffast.c | ||||
| @@ -7,6 +7,7 @@ | ||||
|  #include "inftrees.h" | ||||
|  #include "inflate.h" | ||||
|  #include "inffast.h" | ||||
| +#include "chunkcopy.h" | ||||
|   | ||||
|  #ifdef ASMINF | ||||
|  #  pragma message("Assembler code may have bugs -- use at your own risk") | ||||
| @@ -57,6 +58,7 @@ unsigned start;         /* inflate()'s s | ||||
|      unsigned char FAR *out;     /* local strm->next_out */ | ||||
|      unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */ | ||||
|      unsigned char FAR *end;     /* while out < end, enough space available */ | ||||
| +    unsigned char FAR *limit;   /* safety limit for chunky copies */ | ||||
|  #ifdef INFLATE_STRICT | ||||
|      unsigned dmax;              /* maximum distance from zlib header */ | ||||
|  #endif | ||||
| @@ -84,12 +86,13 @@ unsigned start;         /* inflate()'s s | ||||
|      out = strm->next_out; | ||||
|      beg = out - (start - strm->avail_out); | ||||
|      end = out + (strm->avail_out - 257); | ||||
| +    limit = out + strm->avail_out; | ||||
|  #ifdef INFLATE_STRICT | ||||
|      dmax = state->dmax; | ||||
|  #endif | ||||
|      wsize = state->wsize; | ||||
|      whave = state->whave; | ||||
| -    wnext = state->wnext; | ||||
| +    wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext; | ||||
|      window = state->window; | ||||
|      hold = state->hold; | ||||
|      bits = state->bits; | ||||
| @@ -197,70 +200,51 @@ unsigned start;         /* inflate()'s s | ||||
|  #endif | ||||
|                      } | ||||
|                      from = window; | ||||
| -                    if (wnext == 0) {           /* very common case */ | ||||
| -                        from += wsize - op; | ||||
| -                        if (op < len) {         /* some from window */ | ||||
| -                            len -= op; | ||||
| -                            do { | ||||
| -                                *out++ = *from++; | ||||
| -                            } while (--op); | ||||
| -                            from = out - dist;  /* rest from output */ | ||||
| -                        } | ||||
| +                    if (wnext >= op) {          /* contiguous in window */ | ||||
| +                        from += wnext - op; | ||||
|                      } | ||||
| -                    else if (wnext < op) {      /* wrap around window */ | ||||
| -                        from += wsize + wnext - op; | ||||
| +                    else {                      /* wrap around window */ | ||||
|                          op -= wnext; | ||||
| +                        from += wsize - op; | ||||
|                          if (op < len) {         /* some from end of window */ | ||||
|                              len -= op; | ||||
| -                            do { | ||||
| -                                *out++ = *from++; | ||||
| -                            } while (--op); | ||||
| -                            from = window; | ||||
| -                            if (wnext < len) {  /* some from start of window */ | ||||
| -                                op = wnext; | ||||
| -                                len -= op; | ||||
| -                                do { | ||||
| -                                    *out++ = *from++; | ||||
| -                                } while (--op); | ||||
| -                                from = out - dist;      /* rest from output */ | ||||
| -                            } | ||||
| -                        } | ||||
| -                    } | ||||
| -                    else {                      /* contiguous in window */ | ||||
| -                        from += wnext - op; | ||||
| -                        if (op < len) {         /* some from window */ | ||||
| -                            len -= op; | ||||
| -                            do { | ||||
| -                                *out++ = *from++; | ||||
| -                            } while (--op); | ||||
| -                            from = out - dist;  /* rest from output */ | ||||
| -                        } | ||||
| -                    } | ||||
| -                    while (len > 2) { | ||||
| -                        *out++ = *from++; | ||||
| -                        *out++ = *from++; | ||||
| -                        *out++ = *from++; | ||||
| -                        len -= 3; | ||||
| -                    } | ||||
| -                    if (len) { | ||||
| -                        *out++ = *from++; | ||||
| -                        if (len > 1) | ||||
| -                            *out++ = *from++; | ||||
| +                            out = chunkcopy_safe(out, from, op, limit); | ||||
| +                            from = window;      /* more from start of window */ | ||||
| +                            op = wnext; | ||||
| +                            /* This (rare) case can create a situation where | ||||
| +                               the first chunkcopy below must be checked. | ||||
| +                             */ | ||||
| +                        } | ||||
| +                    } | ||||
| +                    if (op < len) {             /* still need some from output */ | ||||
| +                        out = chunkcopy_safe(out, from, op, limit); | ||||
| +                        len -= op; | ||||
| +                        /* When dist is small the amount of data that can be | ||||
| +                           copied from the window is also small, and progress | ||||
| +                           towards the dangerous end of the output buffer is | ||||
| +                           also small.  This means that for trivial memsets and | ||||
| +                           for chunkunroll_relaxed() a safety check is | ||||
| +                           unnecessary.  However, these conditions may not be | ||||
| +                           entered at all, and in that case it's possible that | ||||
| +                           the main copy is near the end. | ||||
| +                          */ | ||||
| +                        out = chunkunroll_relaxed(out, &dist, &len); | ||||
| +                        out = chunkcopy_safe(out, out - dist, len, limit); | ||||
| +                    } else { | ||||
| +                        /* from points to window, so there is no risk of | ||||
| +                           overlapping pointers requiring memset-like behaviour | ||||
| +                         */ | ||||
| +                        out = chunkcopy_safe(out, from, len, limit); | ||||
|                      } | ||||
|                  } | ||||
|                  else { | ||||
| -                    from = out - dist;          /* copy direct from output */ | ||||
| -                    do {                        /* minimum length is three */ | ||||
| -                        *out++ = *from++; | ||||
| -                        *out++ = *from++; | ||||
| -                        *out++ = *from++; | ||||
| -                        len -= 3; | ||||
| -                    } while (len > 2); | ||||
| -                    if (len) { | ||||
| -                        *out++ = *from++; | ||||
| -                        if (len > 1) | ||||
| -                            *out++ = *from++; | ||||
| -                    } | ||||
| +                    /* Whole reference is in range of current output.  No | ||||
| +                       range checks are necessary because we start with room | ||||
| +                       for at least 258 bytes of output, so unroll and roundoff | ||||
| +                       operations can write beyond `out+len` so long as they | ||||
| +                       stay within 258 bytes of `out`. | ||||
| +                     */ | ||||
| +                    out = chunkcopy_lapped_relaxed(out, dist, len); | ||||
|                  } | ||||
|              } | ||||
|              else if ((op & 64) == 0) {          /* 2nd level distance code */ | ||||
| --- a/contrib/arm/inflate.c | ||||
| +++ b/contrib/arm/inflate.c | ||||
| @@ -84,6 +84,7 @@ | ||||
|  #include "inftrees.h" | ||||
|  #include "inflate.h" | ||||
|  #include "inffast.h" | ||||
| +#include "contrib/arm/chunkcopy.h" | ||||
|   | ||||
|  #ifdef MAKEFIXED | ||||
|  #  ifndef BUILDFIXED | ||||
| @@ -405,10 +406,20 @@ unsigned copy; | ||||
|   | ||||
|      /* if it hasn't been done already, allocate space for the window */ | ||||
|      if (state->window == Z_NULL) { | ||||
| +        unsigned wsize = 1U << state->wbits; | ||||
|          state->window = (unsigned char FAR *) | ||||
| -                        ZALLOC(strm, 1U << state->wbits, | ||||
| +                        ZALLOC(strm, wsize + CHUNKCOPY_CHUNK_SIZE, | ||||
|                                 sizeof(unsigned char)); | ||||
|          if (state->window == Z_NULL) return 1; | ||||
| +#ifdef INFLATE_CLEAR_UNUSED_UNDEFINED | ||||
| +        /* Copies from the overflow portion of this buffer are undefined and | ||||
| +           may cause analysis tools to raise a warning if we don't initialize | ||||
| +           it.  However, this undefined data overwrites other undefined data | ||||
| +           and is subsequently either overwritten or left deliberately | ||||
| +           undefined at the end of decode; so there's really no point. | ||||
| +         */ | ||||
| +        memset(state->window + wsize, 0, CHUNKCOPY_CHUNK_SIZE); | ||||
| +#endif | ||||
|      } | ||||
|   | ||||
|      /* if window not in use yet, initialize */ | ||||
| @@ -1175,17 +1186,16 @@ int flush; | ||||
|                  else | ||||
|                      from = state->window + (state->wnext - copy); | ||||
|                  if (copy > state->length) copy = state->length; | ||||
| +                if (copy > left) copy = left; | ||||
| +                put = chunkcopy_safe(put, from, copy, put + left); | ||||
|              } | ||||
|              else {                              /* copy from output */ | ||||
| -                from = put - state->offset; | ||||
|                  copy = state->length; | ||||
| +                if (copy > left) copy = left; | ||||
| +                put = chunkcopy_lapped_safe(put, state->offset, copy, put + left); | ||||
|              } | ||||
| -            if (copy > left) copy = left; | ||||
|              left -= copy; | ||||
|              state->length -= copy; | ||||
| -            do { | ||||
| -                *put++ = *from++; | ||||
| -            } while (--copy); | ||||
|              if (state->length == 0) state->mode = LEN; | ||||
|              break; | ||||
|          case LIT: | ||||
| @@ -0,0 +1,98 @@ | ||||
| --- a/CMakeLists.txt | ||||
| +++ b/CMakeLists.txt | ||||
| @@ -93,34 +93,67 @@ set(ZLIB_PUBLIC_HDRS | ||||
|      ${CMAKE_CURRENT_BINARY_DIR}/zconf.h | ||||
|      zlib.h | ||||
|  ) | ||||
| -set(ZLIB_PRIVATE_HDRS | ||||
| -    crc32.h | ||||
| -    deflate.h | ||||
| -    gzguts.h | ||||
| -    inffast.h | ||||
| -    inffixed.h | ||||
| -    inflate.h | ||||
| -    inftrees.h | ||||
| -    trees.h | ||||
| -    zutil.h | ||||
| -) | ||||
| -set(ZLIB_SRCS | ||||
| -    adler32.c | ||||
| -    compress.c | ||||
| -    crc32.c | ||||
| -    deflate.c | ||||
| -    gzclose.c | ||||
| -    gzlib.c | ||||
| -    gzread.c | ||||
| -    gzwrite.c | ||||
| -    inflate.c | ||||
| -    infback.c | ||||
| -    inftrees.c | ||||
| -    inffast.c | ||||
| -    trees.c | ||||
| -    uncompr.c | ||||
| -    zutil.c | ||||
| -) | ||||
| + | ||||
| +if(ARMv8) | ||||
| +    set(ZLIB_PRIVATE_HDRS | ||||
| +	crc32.h | ||||
| +	deflate.h | ||||
| +	gzguts.h | ||||
| +	inffast.h | ||||
| +	inffixed.h | ||||
| +	inflate.h | ||||
| +	inftrees.h | ||||
| +	trees.h | ||||
| +	zutil.h | ||||
| +	contrib/arm/chunkcopy.h | ||||
| +    ) | ||||
| +    set(ZLIB_SRCS | ||||
| +	adler32.c | ||||
| +	compress.c | ||||
| +	crc32.c | ||||
| +	deflate.c | ||||
| +	gzclose.c | ||||
| +	gzlib.c | ||||
| +	gzread.c | ||||
| +	gzwrite.c | ||||
| +	infback.c | ||||
| +	inftrees.c | ||||
| +	contrib/arm/inflate.c | ||||
| +	contrib/arm/inffast.c | ||||
| +	trees.c | ||||
| +	uncompr.c | ||||
| +	zutil.c | ||||
| +    ) | ||||
| +    else() | ||||
| +    set(ZLIB_PRIVATE_HDRS | ||||
| +	crc32.h | ||||
| +	deflate.h | ||||
| +	gzguts.h | ||||
| +	inffast.h | ||||
| +	inffixed.h | ||||
| +	inflate.h | ||||
| +	inftrees.h | ||||
| +	trees.h | ||||
| +	zutil.h | ||||
| +    ) | ||||
| +    set(ZLIB_SRCS | ||||
| +	adler32.c | ||||
| +	compress.c | ||||
| +	crc32.c | ||||
| +	deflate.c | ||||
| +	gzclose.c | ||||
| +	gzlib.c | ||||
| +	gzread.c | ||||
| +	gzwrite.c | ||||
| +	inflate.c | ||||
| +	infback.c | ||||
| +	inftrees.c | ||||
| +	inffast.c | ||||
| +	trees.c | ||||
| +	uncompr.c | ||||
| +	zutil.c | ||||
| +    ) | ||||
| +endif() | ||||
|   | ||||
|  if(NOT MINGW) | ||||
|      set(ZLIB_DLL_SRCS | ||||
| @@ -0,0 +1,14 @@ | ||||
| --- a/zlib.pc.cmakein | ||||
| +++ b/zlib.pc.cmakein | ||||
| @@ -1,8 +1,8 @@ | ||||
|  prefix=@CMAKE_INSTALL_PREFIX@ | ||||
|  exec_prefix=@CMAKE_INSTALL_PREFIX@ | ||||
| -libdir=@INSTALL_LIB_DIR@ | ||||
| -sharedlibdir=@INSTALL_LIB_DIR@ | ||||
| -includedir=@INSTALL_INC_DIR@ | ||||
| +libdir=${exec_prefix}/lib | ||||
| +sharedlibdir=${exec_prefix}/lib | ||||
| +includedir=${prefix}/include | ||||
|   | ||||
|  Name: zlib | ||||
|  Description: zlib compression library | ||||
		Reference in New Issue
	
	Block a user
	 domenico
					domenico