From: leslie <unknown>
Date: Tue, 21 Jul 2009 11:59:29 +0000 (+0800)
Subject: sasc: use unified FFdecsa
X-Git-Tag: upstream/620~255^2~3
X-Git-Url: http://www.vanbest.org/gitweb/?a=commitdiff_plain;h=2a03bf90f1afcc55cf6652a2610680f8ee1397de;p=sasc-ng.git

sasc: use unified FFdecsa
---

diff --git a/contrib/sasc-ng/FFdecsa/.empty b/contrib/sasc-ng/FFdecsa/.empty
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/sasc-ng/FFdecsa/COPYING b/contrib/sasc-ng/FFdecsa/COPYING
deleted file mode 100644
index a43ea21..0000000
--- a/contrib/sasc-ng/FFdecsa/COPYING
+++ /dev/null
@@ -1,339 +0,0 @@
-		    GNU GENERAL PUBLIC LICENSE
-		       Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-                          675 Mass Ave, Cambridge, MA 02139, USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-			    Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users.  This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it.  (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.)  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
-  To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have.  You must make sure that they, too, receive or can get the
-source code.  And you must show them these terms so they know their
-rights.
-
-  We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
-  Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software.  If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
-  Finally, any free program is threatened constantly by software
-patents.  We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary.  To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-		    GNU GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License.  The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language.  (Hereinafter, translation is included without limitation in
-the term "modification".)  Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
-  1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
-  2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) You must cause the modified files to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    b) You must cause any work that you distribute or publish, that in
-    whole or in part contains or is derived from the Program or any
-    part thereof, to be licensed as a whole at no charge to all third
-    parties under the terms of this License.
-
-    c) If the modified program normally reads commands interactively
-    when run, you must cause it, when started running for such
-    interactive use in the most ordinary way, to print or display an
-    announcement including an appropriate copyright notice and a
-    notice that there is no warranty (or else, saying that you provide
-    a warranty) and that users may redistribute the program under
-    these conditions, and telling the user how to view a copy of this
-    License.  (Exception: if the Program itself is interactive but
-    does not normally print such an announcement, your work based on
-    the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
-    a) Accompany it with the complete corresponding machine-readable
-    source code, which must be distributed under the terms of Sections
-    1 and 2 above on a medium customarily used for software interchange; or,
-
-    b) Accompany it with a written offer, valid for at least three
-    years, to give any third party, for a charge no more than your
-    cost of physically performing source distribution, a complete
-    machine-readable copy of the corresponding source code, to be
-    distributed under the terms of Sections 1 and 2 above on a medium
-    customarily used for software interchange; or,
-
-    c) Accompany it with the information you received as to the offer
-    to distribute corresponding source code.  (This alternative is
-    allowed only for noncommercial distribution and only if you
-    received the program in object code or executable form with such
-    an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it.  For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable.  However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License.  Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-  5. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Program or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-  6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
-  7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded.  In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-  9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation.  If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
-  10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission.  For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this.  Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-			    NO WARRANTY
-
-  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
-		     END OF TERMS AND CONDITIONS
-
-	Appendix: How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) 19yy  <name of author>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-    Gnomovision version 69, Copyright (C) 19yy name of author
-    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary.  Here is a sample; alter the names:
-
-  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-  `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-  <signature of Ty Coon>, 1 April 1989
-  Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs.  If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library.  If this is what you want to do, use the GNU Library General
-Public License instead of this License.
diff --git a/contrib/sasc-ng/FFdecsa/ChangeLog b/contrib/sasc-ng/FFdecsa/ChangeLog
deleted file mode 100644
index d1c4f0c..0000000
--- a/contrib/sasc-ng/FFdecsa/ChangeLog
+++ /dev/null
@@ -1,206 +0,0 @@
-- created
-
-- released 0.0.1
-
-- simplified s, A, B
-
-- released 0.0.2
-
-- simplified nxt=
-
-- released 0.0.3
-
-- removed commented code
-- code formatting
-
-- released 0.0.4
-
-- kk now unsigned char
-- removed 64 bit ints
-
-- released 0.0.5
-
-- created decrypt_2ts
-
-- released 0.0.6
-
-- renamed files
-- created decrypt_many_ts, removed others
-- external interface has 2 functions only: set_cws() and decrypt_many_ts()
-- reformatted code
-- reimplemented s12,s34,s56,s7
-- unsigned char become int for table optimization
-
-- released 0.0.7
-
-- optional icc compiler
-- kk now 0..55
-- decrypt_many_ts really works (no parallelism yet)
-- added get_cws() to interface
-- created stream.c
-- created key_schedule_stream, using iA[] and iB[]
-
-- released 0.0.8
-
-- decrypt_many_ts() makes a group, sorts the packets, processes them
-- preliminar stream_cypher_group() created
-- parallel computing activated
-- huge speed increase (+500%) thanks to stream_cypher_group()
-
-- released 0.0.9
-
-- block_cypher_group() created (no parallelism yet)
-
-- released 0.0.10
-
-- block_cypher_group() has 56 simple iterations
-- block_cypher_group() doesn't shift registers anymore
-
-- released 0.0.11
-
-- some parallelization on block_cypher_group()
-
-- released 0.0.12
-
-- better parallelization of block_cypher_group()
-
-- released 0.0.13
-
-- block_cypher() was still called by error when N=23
-- speed is now 109Mbit/s on AMD XP2000+ CPU
-
-- released 0.0.14
-
-- stream_cypher_group() has a init and normal variant
-- A[0]-A[9] instead of A[1]-A[10], same for B
-- implemented virtual shift of A and B
-- speed is now 117Mbit/s on AMD XP2000+ CPU
-
-- released 0.0.15
-
-- better optimization of E and F in the stream cypher
-- speed is now 119Mbit/s on AMD XP2000+ CPU
-
-- released 0.0.16
-
-- removed some debug overhead
-- speed is now 120Mbit/s on AMD XP2000+ CPU
-
-- released 0.0.17
-
-- don't move packets with residue anymore
-- speed is now 123Mbit/s on AMD XP2000+ CPU
-
-- released 0.0.18
-
-- solved alignment problems
-- search groupable packets even beyond ungroupable ones
-  (more speed in some real world cases)
-- created decrypt_many_ts2(), useful with circular buffers
-
-- released 0.0.19
-
-- removed old code
-
-- released 0.0.20
-
-- partially converted code to size-independent group
-- icc doesn't work with optimizations on
-
-- released 0.1.1
-
-- merge loops on block_decypher (speed++ gcc, speed-- icc)
-- transposition are now functions (speed-- icc)
-- icc works again (compiler bug work around?)
-
-- released 0.1.2
-
-- better use of COPY8 &co
-- better flags for gcc
-- removed old code
-
-- released 0.1.3
-
-- int and not char in block cypher (speed++++++ gcc, speed-- icc)
-
-- released 0.1.4
-
-- group abstraction finally implemented
-- support for group width 64
-
-- released 0.1.5
-
-- group 64 mmx implemented (speed++ gcc)
-
-- released 0.1.6
-
-- more parallelism in block cypher (speed++ gcc)
-- transposition before and after block (disabled because of no speed gain yet)
-
-- released 0.1.7
-
-- more parallelism in block cypher (speed++ gcc)
-- transposition before and after block enabled (speed++ gcc)
-- gcc options (unrolled 500) speed gcc++
-
-- released 0.1.8
-
-- reworked FFN_ALL_* constants (speed++++ gcc) 
-
-- released 0.1.9
-
-- transposition in block as inlined functions
-- group abstraction working well
-
-- released 0.1.10
-
-- group 128 sse implemented, but batch is 64 mmx (not faster than group 64 mmx)
-
-- released 0.1.11
-
-- lot of code polishing and dead code elimination
-- better and more debug output
-
-- released 0.1.12
-
-- name change: FFdecsa
-
-- released 0.2.0
-
-- separated test cases
-- corrected all group_modes (now called parallel_modes)
-- parallel 128 8 char implemented
-- parallel 64 long implemented
-- parallel 128 2 long implemented
-- parallel 128 2 mmx implemented (incredibly slow, the compiler is very confused)
-- parallel 128 16 charA implemented (very slow compilation)
-- parallel 128 16 char implemented
-- renamed softcsa* to FFdecsa*
-
-- released 0.2.1
-
-- new external interface (based on ranges)
-
-- released 0.2.2
-
-- can be compiled with g++ too
-- using g++ the code is 3% faster!
-- external interface: function name changing and new functions
-- a group of ranges is now called a cluster
-- renamed autogenerated files
-
-- released 0.2.3
-
-- written docs
-- removed unneeded files
-- added Copyright and license notes
-- reworked "logic"
-
-- released 0.3.0
-
-- Makefile reworked
-- misc fixes
-- added vdr patch
-
-- released 1.0.0 (public release)
-
diff --git a/contrib/sasc-ng/FFdecsa/FFdecsa.c b/contrib/sasc-ng/FFdecsa/FFdecsa.c
deleted file mode 100644
index 8ba7322..0000000
--- a/contrib/sasc-ng/FFdecsa/FFdecsa.c
+++ /dev/null
@@ -1,878 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#include <sys/types.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "FFdecsa.h"
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-//#define DEBUG
-#ifdef DEBUG
-#define DBG(a) a
-#else
-#define DBG(a)
-#endif
-
-//// parallelization stuff, large speed differences are possible
-// possible choices
-#define PARALLEL_32_4CHAR     320
-#define PARALLEL_32_4CHARA    321
-#define PARALLEL_32_INT       322
-#define PARALLEL_64_8CHAR     640
-#define PARALLEL_64_8CHARA    641
-#define PARALLEL_64_2INT      642
-#define PARALLEL_64_LONG      643
-#define PARALLEL_64_MMX       644
-#define PARALLEL_128_16CHAR  1280
-#define PARALLEL_128_16CHARA 1281
-#define PARALLEL_128_4INT    1282
-#define PARALLEL_128_2LONG   1283
-#define PARALLEL_128_2MMX    1284
-#define PARALLEL_128_SSE     1285
-#define PARALLEL_128_SSE2    1286
-
-//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
-#ifndef PARALLEL_MODE
-#define PARALLEL_MODE PARALLEL_32_INT
-#endif
-//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
-
-#include "parallel_generic.h"
-//// conditionals
-#if PARALLEL_MODE==PARALLEL_32_4CHAR
-#include "parallel_032_4char.h"
-#elif PARALLEL_MODE==PARALLEL_32_4CHARA
-#include "parallel_032_4charA.h"
-#elif PARALLEL_MODE==PARALLEL_32_INT
-#include "parallel_032_int.h"
-#elif PARALLEL_MODE==PARALLEL_64_8CHAR
-#include "parallel_064_8char.h"
-#elif PARALLEL_MODE==PARALLEL_64_8CHARA
-#include "parallel_064_8charA.h"
-#elif PARALLEL_MODE==PARALLEL_64_2INT
-#include "parallel_064_2int.h"
-#elif PARALLEL_MODE==PARALLEL_64_LONG
-#include "parallel_064_long.h"
-#elif PARALLEL_MODE==PARALLEL_64_MMX
-#include "parallel_064_mmx.h"
-#elif PARALLEL_MODE==PARALLEL_128_16CHAR
-#include "parallel_128_16char.h"
-#elif PARALLEL_MODE==PARALLEL_128_16CHARA
-#include "parallel_128_16charA.h"
-#elif PARALLEL_MODE==PARALLEL_128_4INT
-#include "parallel_128_4int.h"
-#elif PARALLEL_MODE==PARALLEL_128_2LONG
-#include "parallel_128_2long.h"
-#elif PARALLEL_MODE==PARALLEL_128_2MMX
-#include "parallel_128_2mmx.h"
-#elif PARALLEL_MODE==PARALLEL_128_SSE
-#include "parallel_128_sse.h"
-#elif PARALLEL_MODE==PARALLEL_128_SSE2
-#include "parallel_128_sse2.h"
-#else
-#error "unknown/undefined parallel mode"
-#endif
-
-// stuff depending on conditionals
-
-#define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
-#define BYPG BYTES_PER_GROUP
-#define BITS_PER_GROUP GROUP_PARALLELISM
-#define BIPG BITS_PER_GROUP
-
-#ifndef MALLOC
-#define MALLOC(X) malloc(X)
-#endif
-#ifndef FREE
-#define FREE(X) free(X)
-#endif
-#ifndef MEMALIGN
-#define MEMALIGN
-#endif
-
-//// debug tool
-
-static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){
-  int i;
-  for(i=0;i<len;i++){
-    if(i%linelen==0&&i) fprintf(stderr,"\n");
-    if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
-    else{
-      if(i%8==0) fprintf(stderr," ");
-      if(i%4==0) fprintf(stderr," ");
-    }
-    fprintf(stderr," %02x",p[i]);
-  }
-  if(i%linelen==0) fprintf(stderr,"\n");
-}
-
-//////////////////////////////////////////////////////////////////////////////////
-
-struct csa_key_t{
-	unsigned char ck[8];
-// used by stream
-        int iA[8];  // iA[0] is for A1, iA[7] is for A8
-        int iB[8];  // iB[0] is for B1, iB[7] is for B8
-// used by stream (group)
-        MEMALIGN group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
-        MEMALIGN group iA_g[8][4]; // [0 for A1][0 for LSB]
-        MEMALIGN group iB_g[8][4]; // [0 for B1][0 for LSB]
-// used by block
-	unsigned char kk[56];
-// used by block (group)
-	MEMALIGN batch kkmulti[56]; // many times the same byte in every batch
-};
-
-struct csa_keys_t{
-  struct csa_key_t even;
-  struct csa_key_t odd;
-};
-
-//-----stream cypher
-
-//-----key schedule for stream decypher
-static void key_schedule_stream(
-  unsigned char *ck,    // [In]  ck[0]-ck[7]   8 bytes   | Key.
-  int *iA,              // [Out] iA[0]-iA[7]   8 nibbles | Key schedule.
-  int *iB)              // [Out] iB[0]-iB[7]   8 nibbles | Key schedule.
-{
-    iA[0]=(ck[0]>>4)&0xf;
-    iA[1]=(ck[0]   )&0xf;
-    iA[2]=(ck[1]>>4)&0xf;
-    iA[3]=(ck[1]   )&0xf;
-    iA[4]=(ck[2]>>4)&0xf;
-    iA[5]=(ck[2]   )&0xf;
-    iA[6]=(ck[3]>>4)&0xf;
-    iA[7]=(ck[3]   )&0xf;
-    iB[0]=(ck[4]>>4)&0xf;
-    iB[1]=(ck[4]   )&0xf;
-    iB[2]=(ck[5]>>4)&0xf;
-    iB[3]=(ck[5]   )&0xf;
-    iB[4]=(ck[6]>>4)&0xf;
-    iB[5]=(ck[6]   )&0xf;
-    iB[6]=(ck[7]>>4)&0xf;
-    iB[7]=(ck[7]   )&0xf;
-}
-
-//----- stream main function
-
-#define STREAM_INIT
-#include "stream.c"
-#undef STREAM_INIT
-
-#define STREAM_NORMAL
-#include "stream.c"
-#undef STREAM_NORMAL
-
-
-//-----block decypher
-
-//-----key schedule for block decypher
-
-static void key_schedule_block(
-  unsigned char *ck,    // [In]  ck[0]-ck[7]   8 bytes | Key.
-  unsigned char *kk)    // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
-{
-  static const unsigned char key_perm[0x40] = {
-    0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
-    0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
-    0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
-    0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
-  };
-
-  int i,j,k;
-  int bit[64];
-  int newbit[64];
-  int kb[7][8];
-
-  // 56 steps
-  // 56 key bytes kk(55)..kk(0) by key schedule from ck
-
-  // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
-  kb[6][0] = ck[0];
-  kb[6][1] = ck[1];
-  kb[6][2] = ck[2];
-  kb[6][3] = ck[3];
-  kb[6][4] = ck[4];
-  kb[6][5] = ck[5];
-  kb[6][6] = ck[6];
-  kb[6][7] = ck[7];
-
-  // calculate kb[5] .. kb[0]
-  for(i=5; i>=0; i--){
-    // 64 bit perm on kb
-    for(j=0; j<8; j++){
-      for(k=0; k<8; k++){
-        bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
-        newbit[key_perm[j*8+k]-1] = bit[j*8+k];
-      }
-    }
-    for(j=0; j<8; j++){
-      kb[i][j] = 0;
-      for(k=0; k<8; k++){
-        kb[i][j] |= newbit[j*8+k] << (7-k);
-      }
-    }
-  }
-
-  // xor to give kk
-  for(i=0; i<7; i++){
-    for(j=0; j<8; j++){
-      kk[i*8+j] = kb[i][j] ^ i;
-    }
-  }
-
-}
-
-//-----block utils
-
-static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
-  int *ri=(int *)in;
-  int *ibi=(int *)out;
-  int j,i,k,g;
-  // copy and first step
-  for(g=0;g<count;g++){
-    ri[g]=ibi[2*g];
-    ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
-  }
-//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01230123
-#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
-  for(j=0;j<8;j+=4){
-    for(i=0;i<2;i++){
-      for(k=0;k<INTS_PER_ROW;k++){
-        unsigned int t,b;
-        t=ri[INTS_PER_ROW*(j+i)+k];
-        b=ri[INTS_PER_ROW*(j+i+2)+k];
-        ri[INTS_PER_ROW*(j+i)+k]=     (t&0x0000ffff)      | ((b           )<<16);
-        ri[INTS_PER_ROW*(j+i+2)+k]=  ((t           )>>16) |  (b&0xffff0000) ;
-      }
-    }
-  }
-//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01010101
-  for(j=0;j<8;j+=2){
-    for(i=0;i<1;i++){
-      for(k=0;k<INTS_PER_ROW;k++){
-        unsigned int t,b;
-        t=ri[INTS_PER_ROW*(j+i)+k];
-        b=ri[INTS_PER_ROW*(j+i+1)+k];
-        ri[INTS_PER_ROW*(j+i)+k]=     (t&0x00ff00ff)     | ((b&0x00ff00ff)<<8);
-        ri[INTS_PER_ROW*(j+i+1)+k]=  ((t&0xff00ff00)>>8) |  (b&0xff00ff00);
-      }
-    }
-  }
-//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 00000000
-}
-
-static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
-  int *ri=(int *)in;
-  int *bdi=(int *)out;
-  int j,i,k,g;
-#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
-//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 00000000
-  for(j=0;j<8;j+=2){
-    for(i=0;i<1;i++){
-      for(k=0;k<INTS_PER_ROW;k++){
-        unsigned int t,b;
-        t=ri[INTS_PER_ROW*(j+i)+k];
-        b=ri[INTS_PER_ROW*(j+i+1)+k];
-        ri[INTS_PER_ROW*(j+i)+k]=     (t&0x00ff00ff)     | ((b&0x00ff00ff)<<8);
-        ri[INTS_PER_ROW*(j+i+1)+k]=  ((t&0xff00ff00)>>8) |  (b&0xff00ff00);
-      }
-    }
-  }
-//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01010101
-  for(j=0;j<8;j+=4){
-    for(i=0;i<2;i++){
-      for(k=0;k<INTS_PER_ROW;k++){
-        unsigned int t,b;
-        t=ri[INTS_PER_ROW*(j+i)+k];
-        b=ri[INTS_PER_ROW*(j+i+2)+k];
-        ri[INTS_PER_ROW*(j+i)+k]=     (t&0x0000ffff)      | ((b           )<<16);
-        ri[INTS_PER_ROW*(j+i+2)+k]=  ((t           )>>16) |  (b&0xffff0000) ;
-      }
-    }
-  }
-//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01230123
-  for(g=0;g<count;g++){
-    bdi[2*g]=ri[g];
-    bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
-  }
-}
-
-//-----block main function
-
-// block group
-static void block_decypher_group(
-  batch *kkmulti,       // [In]  kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
-  unsigned char *ib,    // [In]  (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
-  unsigned char *bd,    // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
-  int count)
-{
-  // int is faster than unsigned char. apparently not
-  static const unsigned char block_sbox[0x100] = {
-    0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
-    0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
-    0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
-    0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
-    0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
-    0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
-    0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
-    0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
-
-    0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
-    0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
-    0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
-    0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
-    0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
-    0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
-    0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
-    0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
-  };
-  MEMALIGN unsigned char r[GROUP_PARALLELISM*(8+56)];  /* 56 because we will move back in memory while looping */
-  MEMALIGN unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
-  int roff;
-  int i,g,count_all=GROUP_PARALLELISM;
-
-  roff=GROUP_PARALLELISM*56;
-
-#define FASTTRASP1
-#ifndef FASTTRASP1
-  for(g=0;g<count;g++){
-    // Init registers 
-    int j;
-    for(j=0;j<8;j++){
-      r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
-    }
-  }
-#else
-  trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
-#endif
-//dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-
-  // loop over kk[55]..kk[0]
-  for(i=55;i>=0;i--){
-    {
-      MEMALIGN batch tkkmulti=kkmulti[i];
-      batch *si=(batch *)sbox_in;
-      batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
-      for(g=0;g<count_all/BYTES_PER_BATCH;g++){
-        si[g]=B_FFXOR(tkkmulti,r6_N[g]);              //FIXME: introduce FASTBATCH?
-      }
-    }
-
-    // table lookup, this works on only one byte at a time
-    // most difficult part of all
-    // - can't be parallelized
-    // - can't be synthetized through boolean terms (8 input bits are too many)
-    for(g=0;g<count_all;g++){
-      sbox_out[g]=block_sbox[sbox_in[g]];
-    }
-
-    // bit permutation
-    {
-      unsigned char *po=(unsigned char *)perm_out;
-      unsigned char *so=(unsigned char *)sbox_out;
-//dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
-      for(g=0;g<count_all;g+=BYTES_PER_BATCH){
-        MEMALIGN batch in,out;
-        in=*(batch *)&so[g];
-
-        out=B_FFOR(
-	    B_FFOR(
-	    B_FFOR(
-	    B_FFOR(
-	    B_FFOR(
-	           B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
-	           B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
-	           B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
-	           B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
-	           B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
-	           B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
-
-        *(batch *)&po[g]=out;
-      }
-//dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
-    }
-
-    roff-=GROUP_PARALLELISM; /* virtual shift of registers */
-
-#if 0
-/* one by one */
-    for(g=0;g<count_all;g++){
-      r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
-      r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
-      r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
-      r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
-      r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
-    }
-#else
-    for(g=0;g<count_all;g+=BEST_SPAN){
-      XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
-      XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
-      XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
-      XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
-      XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
-    }
-#endif
-  }
-
-#define FASTTRASP2
-#ifndef FASTTRASP2
-  for(g=0;g<count;g++){
-    // Copy results
-    int j;
-    for(j=0;j<8;j++){
-      bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
-    }
-  }
-#else
-  trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
-#endif
-}
-
-//-----------------------------------EXTERNAL INTERFACE
-
-//-----get internal parallelism
-
-int get_internal_parallelism(void){
-  return GROUP_PARALLELISM;
-}
-
-//-----get suggested cluster size
-
-int get_suggested_cluster_size(void){
-  int r;
-  r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
-  if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
-  return r;
-}
-
-//-----key structure
-
-void *get_key_struct(void){
-  struct csa_keys_t *keys=(struct csa_keys_t *)MALLOC(sizeof(struct csa_keys_t));
-  if(keys) {
-    static const unsigned char pk[8] = { 0,0,0,0,0,0,0,0 };
-    set_control_words(keys,pk,pk);
-    }
-  return keys;
-}
-
-void free_key_struct(void *keys){
-  return FREE(keys);
-}
-
-//-----set control words
-
-static void schedule_key(struct csa_key_t *key, const unsigned char *pk){
-  // could be made faster, but is not run often
-  int bi,by;
-  int i,j;
-// key
-  memcpy(key->ck,pk,8);
-// precalculations for stream
-  key_schedule_stream(key->ck,key->iA,key->iB);
-  for(by=0;by<8;by++){
-    for(bi=0;bi<8;bi++){
-      key->ck_g[by][bi]=(key->ck[by]&(1<<bi))?FF1():FF0();
-    }
-  }
-  for(by=0;by<8;by++){
-    for(bi=0;bi<4;bi++){
-      key->iA_g[by][bi]=(key->iA[by]&(1<<bi))?FF1():FF0();
-      key->iB_g[by][bi]=(key->iB[by]&(1<<bi))?FF1():FF0();
-    }
-  }
-// precalculations for block
-  key_schedule_block(key->ck,key->kk);
-  for(i=0;i<56;i++){
-    for(j=0;j<BYTES_PER_BATCH;j++){
-      *(((unsigned char *)&key->kkmulti[i])+j)=key->kk[i];
-    }
-  }
-}
-
-void set_control_words(void *keys, const unsigned char *ev, const unsigned char *od){
-  schedule_key(&((struct csa_keys_t *)keys)->even,ev);
-  schedule_key(&((struct csa_keys_t *)keys)->odd,od);
-}
-
-void set_even_control_word(void *keys, const unsigned char *pk){
-  schedule_key(&((struct csa_keys_t *)keys)->even,pk);
-}
-
-void set_odd_control_word(void *keys, const unsigned char *pk){
-  schedule_key(&((struct csa_keys_t *)keys)->odd,pk);
-}
-
-//-----get control words
-
-void get_control_words(void *keys, unsigned char *even, unsigned char *odd){
-  memcpy(even,&((struct csa_keys_t *)keys)->even.ck,8);
-  memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8);
-}
-
-//----- decrypt
-
-int decrypt_packets(void *keys, unsigned char **cluster){
-  // statistics, currently unused
-  int stat_no_scramble=0;
-  int stat_reserved=0;
-  int stat_decrypted[2]={0,0};
-  int stat_decrypted_mini=0;
-  unsigned char **clst;
-  unsigned char **clst2;
-  int grouped;
-  int group_ev_od;
-  int advanced;
-  int can_advance;
-  unsigned char *g_pkt[GROUP_PARALLELISM];
-  int g_len[GROUP_PARALLELISM];
-  int g_offset[GROUP_PARALLELISM];
-  int g_n[GROUP_PARALLELISM];
-  int g_residue[GROUP_PARALLELISM];
-  unsigned char *pkt;
-  int xc0,ev_od,len,offset,n,residue;
-  struct csa_key_t* k;
-  int i,j,iter,g;
-  int t23,tsmall;
-  int alive[24];
-//icc craziness  int pad1=0; //////////align! FIXME
-  unsigned char *encp[GROUP_PARALLELISM];
-  unsigned char stream_in[GROUP_PARALLELISM*8];
-  unsigned char stream_out[GROUP_PARALLELISM*8];
-  MEMALIGN unsigned char ib[GROUP_PARALLELISM*8];
-  MEMALIGN unsigned char block_out[GROUP_PARALLELISM*8];
-  struct stream_regs regs;
-
-//icc craziness  i=(int)&pad1;//////////align!!! FIXME
-
-  // build a list of packets to be processed
-  clst=cluster;
-  grouped=0;
-  advanced=0;
-  can_advance=1;
-  group_ev_od=-1; // silence incorrect compiler warning
-  pkt=*clst;
-  do{ // find a new packet
-    if(grouped==GROUP_PARALLELISM){
-      // full
-      break;
-    }
-    if(pkt==NULL){
-      // no more ranges
-      break;
-    }
-    if(pkt>=*(clst+1)){
-      // out of this range, try next
-      clst++;clst++;
-      pkt=*clst;
-      continue;
-    }
-
-    do{ // handle this packet
-      xc0=pkt[3]&0xc0;
-      DBG(fprintf(stderr,"   exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance));
-      if(xc0==0x00){
-        DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance));
-        advanced+=can_advance;
-        stat_no_scramble++;
-        break;
-      }
-      if(xc0==0x40){
-        DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance));
-        advanced+=can_advance;
-        stat_reserved++;
-        break;
-      }
-      if(xc0==0x80||xc0==0xc0){ // encrypted
-        ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
-        if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
-        if(group_ev_od==ev_od){ // could be added to group
-          pkt[3]&=0x3f;  // consider it decrypted now
-          if(pkt[3]&0x20){ // incomplete packet
-            offset=4+pkt[4]+1;
-            len=188-offset;
-            n=len>>3;
-            residue=len-(n<<3);
-            if(n==0){ // decrypted==encrypted!
-              DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance));
-              advanced+=can_advance;
-              stat_decrypted_mini++;
-              break; // this doesn't need more processing
-            }
-          }else{
-            len=184;
-            offset=4;
-            n=23;
-            residue=0;
-          }
-          g_pkt[grouped]=pkt;
-          g_len[grouped]=len;
-          g_offset[grouped]=offset;
-          g_n[grouped]=n;
-          g_residue[grouped]=residue;
-          DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue));
-          grouped++;
-          advanced+=can_advance;
-          stat_decrypted[ev_od]++;
-        }
-        else{
-          can_advance=0;
-          DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt));
-          break; // skip and go on
-        }
-      }
-    } while(0);
-
-    if(can_advance){
-      // move range start forward
-      *clst+=188;
-    }
-    // next packet, if there is one
-    pkt+=188;
-  } while(1);
-  DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced));
-
-  // delete empty ranges and compact list
-  clst2=cluster;
-  for(clst=cluster;*clst!=NULL;clst+=2){
-    // if not empty
-    if(*clst<*(clst+1)){
-      // it will remain 
-      *clst2=*clst;
-      *(clst2+1)=*(clst+1);
-      clst2+=2;
-    }
-  }
-  *clst2=NULL;
-
-  if(grouped==0){
-    // no processing needed
-    return advanced;
-  }
-
-  //  sort them, longest payload first
-  //  we expect many n=23 packets and a few n<23
-  DBG(fprintf(stderr,"PRESORTING\n"));
-  for(i=0;i<grouped;i++){
-    DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
-    }
-  // grouped is always <= GROUP_PARALLELISM
-
-#define g_swap(a,b) \
-    pkt=g_pkt[a]; \
-    g_pkt[a]=g_pkt[b]; \
-    g_pkt[b]=pkt; \
-\
-    len=g_len[a]; \
-    g_len[a]=g_len[b]; \
-    g_len[b]=len; \
-\
-    offset=g_offset[a]; \
-    g_offset[a]=g_offset[b]; \
-    g_offset[b]=offset; \
-\
-    n=g_n[a]; \
-    g_n[a]=g_n[b]; \
-    g_n[b]=n; \
-\
-    residue=g_residue[a]; \
-    g_residue[a]=g_residue[b]; \
-    g_residue[b]=residue;
-
-  // step 1: move n=23 packets before small packets
-  t23=0;
-  tsmall=grouped-1;
-  for(;;){
-    for(;t23<grouped;t23++){
-      if(g_n[t23]!=23) break;
-    }
-DBG(fprintf(stderr,"t23 after for =%i\n",t23));
-    
-    for(;tsmall>=0;tsmall--){
-      if(g_n[tsmall]==23) break;
-    }
-DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall));
-    
-    if(tsmall-t23<1) break;
-    
-DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall));
-
-    g_swap(t23,tsmall);
-
-    t23++;
-    tsmall--;
-DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall));
-  }
-  DBG(fprintf(stderr,"packets with n=23, t23=%i   grouped=%i\n",t23,grouped));
-  DBG(fprintf(stderr,"MIDSORTING\n"));
-  for(i=0;i<grouped;i++){
-    DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
-    }
-
-  // step 2: sort small packets in decreasing order of n (bubble sort is enough)
-  for(i=t23;i<grouped;i++){
-    for(j=i+1;j<grouped;j++){
-      if(g_n[j]>g_n[i]){
-        g_swap(i,j);
-      }
-    }
-  }
-  DBG(fprintf(stderr,"POSTSORTING\n"));
-  for(i=0;i<grouped;i++){
-    DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
-    }
-
-  // we need to know how many packets need 23 iterations, how many 22...
-  for(i=0;i<=23;i++){
-    alive[i]=0;
-  }
-  // count
-  alive[23-1]=t23;
-  for(i=t23;i<grouped;i++){
-    alive[g_n[i]-1]++;
-  }
-  // integrate
-  for(i=22;i>=0;i--){
-    alive[i]+=alive[i+1];
-  }
-  DBG(fprintf(stderr,"ALIVE\n"));
-  for(i=0;i<=23;i++){
-    DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i]));
-    }
-
-  // choose key
-  if(group_ev_od==0){
-    k=&((struct csa_keys_t *)keys)->even;
-  }
-  else{
-    k=&((struct csa_keys_t *)keys)->odd;
-  }
-
-  //INIT
-//#define INITIALIZE_UNUSED_INPUT
-#ifdef INITIALIZE_UNUSED_INPUT
-// unnecessary zeroing.
-// without this, we operate on uninitialized memory
-// when grouped<GROUP_PARALLELISM, but it's not a problem,
-// as final results will be discarded.
-// random data makes debugging sessions difficult.
-  for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
-DBG(fprintf(stderr,"--- WARNING: you could gain speed by not initializing unused memory ---\n"));
-#else
-DBG(fprintf(stderr,"--- WARNING: DEBUGGING IS MORE DIFFICULT WHEN PROCESSING RANDOM DATA CHANGING AT EVERY RUN! ---\n"));
-#endif
-
-  for(g=0;g<grouped;g++){
-    encp[g]=g_pkt[g];
-    DBG(fprintf(stderr,"header[%i]=%p (%02x)\n",g,encp[g],*(encp[g])));
-    encp[g]+=g_offset[g]; // skip header
-    FFTABLEIN(stream_in,g,encp[g]);
-  }
-//dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
-
-
-  // ITER 0
-DBG(fprintf(stderr,">>>>>ITER 0\n"));
-  iter=0;
-  stream_cypher_group_init(&regs,k->iA_g,k->iB_g,stream_in);
-  // fill first ib
-  for(g=0;g<alive[iter];g++){
-    COPY_8_BY(ib+8*g,encp[g]);
-  }
-DBG(dump_mem("IB ",ib,8*alive[iter],8));
-  // ITER 1..N-1
-  for (iter=1;iter<23&&alive[iter-1]>0;iter++){
-DBG(fprintf(stderr,">>>>>ITER %i\n",iter));
-    // alive and just dead packets: calc block
-    block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
-DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8));
-    // all packets (dead too): calc stream
-    stream_cypher_group_normal(&regs,stream_out);
-//dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
-
-    // alive packets: calc ib
-    for(g=0;g<alive[iter];g++){
-      FFTABLEOUT(ib+8*g,stream_out,g);
-DBG(dump_mem("stream_out_ib ",ib+8*g,8,8));
-// XOREQ8BY gcc bug? 2x4 ok, 8 ko    UPDATE: result ok but speed 1-2% slower (!!!???)
-#if 1
-      XOREQ_4_BY(ib+8*g,encp[g]+8);
-      XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
-#else
-      XOREQ_8_BY(ib+8*g,encp[g]+8);
-#endif
-DBG(dump_mem("after_stream_xor_ib ",ib+8*g,8,8));
-    }
-    // alive packets: decrypt data
-    for(g=0;g<alive[iter];g++){
-DBG(dump_mem("before_ib_decrypt_data ",encp[g],8,8));
-      XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
-DBG(dump_mem("after_ib_decrypt_data ",encp[g],8,8));
-    }
-    // just dead packets: write decrypted data
-    for(g=alive[iter];g<alive[iter-1];g++){
-DBG(dump_mem("jd_before_ib_decrypt_data ",encp[g],8,8));
-      COPY_8_BY(encp[g],block_out+8*g);
-DBG(dump_mem("jd_after_ib_decrypt_data ",encp[g],8,8));
-    }
-    // just dead packets: decrypt residue
-    for(g=alive[iter];g<alive[iter-1];g++){
-DBG(dump_mem("jd_before_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
-      FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
-DBG(dump_mem("jd_after_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
-    }
-    // alive packets: pointers++
-    for(g=0;g<alive[iter];g++) encp[g]+=8;
-  };
-  // ITER N
-DBG(fprintf(stderr,">>>>>ITER 23\n"));
-  iter=23;
-  // calc block
-  block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
-DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8));
-  // just dead packets: write decrypted data
-  for(g=alive[iter];g<alive[iter-1];g++){
-DBG(dump_mem("23jd_before_ib_decrypt_data ",encp[g],8,8));
-    COPY_8_BY(encp[g],block_out+8*g);
-DBG(dump_mem("23jd_after_ib_decrypt_data ",encp[g],8,8));
-  }
-  // no residue possible
-  // so do nothing
-
-  DBG(fprintf(stderr,"returning advanced=%i\n",advanced));
-
-  M_EMPTY(); // restore CPU multimedia state
-
-  return advanced;
-}
diff --git a/contrib/sasc-ng/FFdecsa/FFdecsa.h b/contrib/sasc-ng/FFdecsa/FFdecsa.h
deleted file mode 100644
index 1be08e7..0000000
--- a/contrib/sasc-ng/FFdecsa/FFdecsa.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#ifndef FFDECSA_H
-#define FFDECSA_H
-
-//----- public interface
-
-// -- how many packets can be decrypted at the same time
-// This is an info about internal decryption parallelism.
-// You should try to call decrypt_packets with more packets than the number
-// returned here for performance reasons (use get_suggested_cluster_size to know
-// how many).
-int get_internal_parallelism(void);
-
-// -- how many packets you should have in a cluster when calling decrypt_packets
-// This is a suggestion to achieve optimal performance; typically a little
-// higher than what get_internal_parallelism returns.
-// Passing less packets could slow down the decryption.
-// Passing more packets is never bad (if you don't spend a lot of time building
-// the list).
-int get_suggested_cluster_size(void);
-
-// -- alloc & free the key structure
-void *get_key_struct(void);
-void free_key_struct(void *keys);
-
-// -- set control words, 8 bytes each
-void set_control_words(void *keys, const unsigned char *even, const unsigned char *odd);
-
-// -- set even control word, 8 bytes
-void set_even_control_word(void *keys, const unsigned char *even);
-
-// -- set odd control word, 8 bytes
-void set_odd_control_word(void *keys, const unsigned char *odd);
-
-// -- get control words, 8 bytes each
-//void get_control_words(void *keys, unsigned char *even, unsigned char *odd);
-
-// -- decrypt many TS packets
-// This interface is a bit complicated because it is designed for maximum speed.
-// Please read doc/how_to_use.txt.
-int decrypt_packets(void *keys, unsigned char **cluster);
-
-#endif
diff --git a/contrib/sasc-ng/FFdecsa/FFdecsa_test.c b/contrib/sasc-ng/FFdecsa/FFdecsa_test.c
deleted file mode 100644
index dfaa93d..0000000
--- a/contrib/sasc-ng/FFdecsa/FFdecsa_test.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#include <string.h>
-#include <stdio.h>
-#include <sys/time.h>
-
-#include "FFdecsa.h"
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-#include "FFdecsa_test_testcases.h"
-
-int compare(unsigned char *p1, unsigned char *p2, int n, int silently){
-  int i;
-  int ok=1;
-  for(i=0;i<n;i++){
-    if(i==3) continue; // tolerate this
-    if(p1[i]!=p2[i]){
-      fprintf(stderr,"at pos 0x%02x, got 0x%02x instead of 0x%02x\n",i,p1[i],p2[i]);
-      ok=0;
-    }
-  }
-  if(!silently){
-    if(ok){
-       fprintf(stderr,"CORRECT!\n");
-    }
-    else{
-       fprintf(stderr,"FAILED!\n");
-    }
-  }
-  return ok;
-}
-
-
-//MAIN
-
-#define TS_PKTS_FOR_TEST 30*1000
-//#define TS_PKTS_FOR_TEST 1000*1000
-unsigned char megabuf[188*TS_PKTS_FOR_TEST];
-unsigned char onebuf[188];
-
-unsigned char *cluster[10];
-
-int main(void){
-  int i;
-  struct timeval tvs,tve;
-  void *keys=get_key_struct();
-
-  fprintf(stderr,"FFdecsa 1.0: testing correctness and speed\n");
-
-/* begin correctness testing */
-
-  set_control_words(keys,test_invalid_key,test_1_key);
-  memcpy(onebuf,test_1_encrypted,188);
-  cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
-  decrypt_packets(keys,cluster);
-  compare(onebuf,test_1_expected,188,0);
-
-  set_control_words(keys,test_2_key,test_invalid_key);
-  memcpy(onebuf,test_2_encrypted,188);
-  cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
-  decrypt_packets(keys,cluster);
-  compare(onebuf,test_2_expected,188,0);
-
-  set_control_words(keys,test_3_key,test_invalid_key);
-  memcpy(onebuf,test_3_encrypted,188);
-  cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
-  decrypt_packets(keys,cluster);
-  compare(onebuf,test_3_expected,188,0);
-
-  set_control_words(keys,test_p_10_0_key,test_invalid_key);
-  memcpy(onebuf,test_p_10_0_encrypted,188);
-  cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
-  decrypt_packets(keys,cluster);
-  compare(onebuf,test_p_10_0_expected,188,0);
-
-  set_control_words(keys,test_p_1_6_key,test_invalid_key);
-  memcpy(onebuf,test_p_1_6_encrypted,188);
-  cluster[0]=onebuf;cluster[1]=onebuf+188;cluster[2]=NULL;
-  decrypt_packets(keys,cluster);
-  compare(onebuf,test_p_1_6_expected,188,0);
-
-/* begin speed testing */
-
-#if 0
-// test on short packets
-#define s_encrypted test_p_1_6_encrypted
-#define s_key_e     test_p_1_6_key
-#define s_key_o     test_invalid_key
-#define s_expected  test_p_1_6_expected
-
-#else
-//test on full packets
-#define s_encrypted test_2_encrypted
-#define s_key_e     test_2_key
-#define s_key_o     test_invalid_key
-#define s_expected  test_2_expected
-
-#endif
-
-  for(i=0;i<TS_PKTS_FOR_TEST;i++){
-    memcpy(&megabuf[188*i],s_encrypted,188);
-  }
-// test that packets are not shuffled around
-// so, let's put an undecryptable packet somewhere in the middle (we will use a wrong key)
-#define noONE_POISONED_PACKET
-#ifdef ONE_POISONED_PACKET
-  memcpy(&megabuf[188*(TS_PKTS_FOR_TEST*2/3)],test_3_encrypted,188);
-#endif
-
-  // start decryption
-  set_control_words(keys,s_key_e,s_key_o);
-  gettimeofday(&tvs,NULL);
-#if 0
-// force one by one
-  for(i=0;i<TS_PKTS_FOR_TEST;i++){
-    cluster[0]=megabuf+188*i;cluster[1]=onebuf+188*i+188;cluster[2]=NULL;
-    decrypt_packets(keys,cluster);
-  }
-#else
-  {
-    int done=0;
-    while(done<TS_PKTS_FOR_TEST){
-      //fprintf(stderr,"done=%i\n",done);
-      cluster[0]=megabuf+188*done;cluster[1]=megabuf+188*TS_PKTS_FOR_TEST;cluster[2]=NULL;
-      done+=decrypt_packets(keys,cluster);
-    }
-  }
-#endif
-  gettimeofday(&tve,NULL);
-  //end decryption
-
-  fprintf(stderr,"speed=%f Mbit/s\n",(184*TS_PKTS_FOR_TEST*8)/((tve.tv_sec-tvs.tv_sec)+1e-6*(tve.tv_usec-tvs.tv_usec))/1000000);
-  fprintf(stderr,"speed=%f pkts/s\n",TS_PKTS_FOR_TEST/((tve.tv_sec-tvs.tv_sec)+1e-6*(tve.tv_usec-tvs.tv_usec)));
-
-  // this packet couldn't be decrypted correctly
-#ifdef ONE_POISONED_PACKET
-  compare(megabuf+188*(TS_PKTS_FOR_TEST*2/3),test_3_expected,188,0); /* will fail because we used a wrong key */
-#endif
-  // these should be ok
-  compare(megabuf,s_expected,188,0);
-  compare(megabuf+188*511,s_expected,188,0);
-  compare(megabuf+188*512,s_expected,188,0);
-  compare(megabuf+188*319,s_expected,188,0);
-  compare(megabuf+188*(TS_PKTS_FOR_TEST-1),s_expected,188,0);
-
-  for(i=0;i<TS_PKTS_FOR_TEST;i++){
-    if(!compare(megabuf+188*i,s_expected,188,1)){
-      fprintf(stderr,"FAILED COMPARISON OF PACKET %10i\n",i);
-    };
-  }
-
-  return 0;
-}
diff --git a/contrib/sasc-ng/FFdecsa/FFdecsa_test_testcases.h b/contrib/sasc-ng/FFdecsa/FFdecsa_test_testcases.h
deleted file mode 100644
index 62d3f94..0000000
--- a/contrib/sasc-ng/FFdecsa/FFdecsa_test_testcases.h
+++ /dev/null
@@ -1,279 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-// TEST DATA
-
-////////// used as a wrong key
-unsigned char test_invalid_key[0x08] = {
-    0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78
-};
-
-
-////////// test 1: odd key
-unsigned char test_1_key[0x8] = {
-    0x07, 0xe0, 0x1b, 0x02, 0xc9, 0xe0, 0x45, 0xee
-};
-unsigned char test_1_encrypted[0x100] = {
-    0x47, 0x00, 0x00, 0xd0,
-    0xde, 0xcf, 0x0a, 0x0d, 0xb2, 0xd7, 0xc4, 0x40, 0xde, 0x5d, 0x63, 0x18, 0x5a, 0x98, 0x17, 0xaa,
-    0xc9, 0xbc, 0x27, 0xc6, 0xcb, 0x49, 0x40, 0x48, 0xfd, 0x20, 0xb7, 0x05, 0x5b, 0x27, 0xcb, 0xeb,
-    0x9a, 0xf0, 0xac, 0x45, 0x6d, 0x56, 0xf4, 0x7b, 0x6f, 0xa0, 0x57, 0xf3, 0x9b, 0xf7, 0xa2, 0xc7,
-    0xd4, 0x68, 0x24, 0x00, 0x2f, 0x28, 0x13, 0x96, 0x94, 0xa8, 0x7c, 0xf4, 0x6f, 0x07, 0x2a, 0x0e,
-    0xe8, 0xa1, 0xeb, 0xc7, 0x80, 0xac, 0x1f, 0x79, 0xbf, 0x5d, 0xb6, 0x10, 0x7c, 0x2e, 0x52, 0xe9,
-    0x34, 0x2c, 0xa8, 0x39, 0x01, 0x73, 0x04, 0x24, 0xa8, 0x1e, 0xdb, 0x5b, 0xcb, 0x24, 0xf6, 0x31,
-    0xab, 0x02, 0x6b, 0xf9, 0xf6, 0xf7, 0xe9, 0x52, 0xad, 0xcf, 0x62, 0x0f, 0x42, 0xf6, 0x66, 0x5d,
-    0xc0, 0x86, 0xf2, 0x7b, 0x40, 0x20, 0xa9, 0xbd, 0x1f, 0xfd, 0x16, 0xad, 0x2e, 0x75, 0xa6, 0xa0,
-    0x85, 0xf3, 0x9c, 0x31, 0x20, 0x4e, 0xfb, 0x95, 0x61, 0x78, 0xce, 0x10, 0xc1, 0x48, 0x5f, 0xd3,
-    0x61, 0x05, 0x12, 0xf4, 0xe2, 0x04, 0xae, 0xe0, 0x86, 0x01, 0x56, 0x55, 0xb1, 0x0f, 0xa6, 0x33,
-    0x95, 0x20, 0x92, 0xf0, 0xbe, 0x39, 0x31, 0xe1, 0x2a, 0xf7, 0x93, 0xb4, 0xf7, 0xe4, 0xf1, 0x85,
-    0xae, 0x50, 0xf1, 0x63, 0xd4, 0x5d, 0x9c, 0x6c
-};
-unsigned char test_1_expected[0x100] = {
-    0x47, 0x00, 0x00, 0xd0,
-    0xaf, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xef, 0xbe, 0xfb, 0xe6, 0xb5, 0xad, 0x7c,
-    0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c, 0xf9, 0xf3, 0xe6, 0xb5, 0xad, 0x6b, 0x5f, 0x3e, 0x7c, 0xf9,
-    0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9, 0xad, 0x6b, 0x5a, 0xd7, 0xcf, 0x9f, 0x3e, 0x5b, 0x16, 0xc7,
-    0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6, 0xb5, 0xf3, 0xe7, 0xcf, 0x96, 0xc5, 0xb1, 0xf3, 0xe7, 0xcf,
-    0x9a, 0xd6, 0xb5, 0xad, 0x7c, 0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c, 0xf9, 0xf3, 0xe6, 0xb5, 0xad,
-    0x6b, 0x5f, 0x3e, 0x7c, 0xf9, 0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9, 0xad, 0x6b, 0x5a, 0xd7, 0xcf,
-    0x9f, 0x3e, 0x5b, 0x16, 0xc7, 0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6, 0xb5, 0xf3, 0xe7, 0xcf, 0x96,
-    0xc5, 0xb1, 0xf3, 0xe7, 0xcf, 0x9a, 0xd6, 0xb5, 0xad, 0x7c, 0xf9, 0xf3, 0xe5, 0xb1, 0x6c, 0x7c,
-    0xf9, 0xf3, 0xe6, 0xb5, 0xad, 0x6b, 0x5f, 0x3e, 0x7c, 0xf9, 0x6c, 0x5b, 0x1f, 0x3e, 0x7c, 0xf9,
-    0xad, 0x6b, 0x5a, 0xd7, 0xcf, 0x9f, 0x3e, 0x5b, 0x16, 0xc7, 0xcf, 0x9f, 0x3e, 0x6b, 0x5a, 0xd6,
-    0xb5, 0xf3, 0xe7, 0xcf, 0x96, 0xc5, 0xb1, 0xf3, 0xe7, 0xcf, 0x9a, 0xd0, 0x00, 0x00, 0x00, 0x00,
-    0xff, 0xfc, 0x44, 0x00, 0x66, 0xb1, 0x11, 0x11
-};
-unsigned char test_1_expected_stream[0x100] = {
-    0xdc, 0x15, 0xde, 0xf1, 0x4a, 0xf1, 0xf8, 0x2c,
-    0x75, 0xc8, 0x3a, 0x1f, 0xbf, 0x67, 0x19, 0xe1,
-    0xf4, 0x6c, 0x78, 0x99, 0x48, 0xaf, 0xef, 0x94,
-    0x71, 0x6b, 0x23, 0x9e, 0x29, 0x69, 0x2d, 0xa1,
-    0x8a, 0xbb, 0xf4, 0x16, 0x68, 0xa5, 0x7f, 0x14,
-    0xa9, 0x37, 0x24, 0x05, 0x5e, 0xdd, 0xec, 0x4b,
-    0xb5, 0xcb, 0x7f, 0x1d, 0xa7, 0x09, 0x2a, 0xce,
-    0xc4, 0x30, 0x83, 0xfd, 0xd9, 0x88, 0xa9, 0xf3,
-    0x85, 0x9c, 0x38, 0x31, 0x88, 0xac, 0x74, 0x02,
-    0x44, 0xdc, 0xb7, 0x81, 0x07, 0xc8, 0x1b, 0x03,
-    0x9c, 0x76, 0xbe, 0xe9, 0x4d, 0x3e, 0x19, 0xad,
-    0xe1, 0xf1, 0xa5, 0x13, 0xe8, 0xc0, 0x12, 0x57,
-    0x68, 0xb1, 0x9c, 0x6c, 0x9f, 0x58, 0x78, 0xee,
-    0x4f, 0x5b, 0x33, 0x1e, 0xc6, 0x29, 0xfc, 0x40,
-    0x58, 0x22, 0xa2, 0xd8, 0x32, 0xdd, 0x29, 0x4f,
-    0x2b, 0xe1, 0xef, 0xe4, 0xbb, 0xf2, 0x60, 0x94,
-    0x6c, 0xc5, 0x51, 0xec, 0x35, 0x4c, 0x27, 0xc6,
-    0x9d, 0x73, 0xe0, 0xf4, 0x2b, 0xfa, 0x62, 0x12,
-    0xcd, 0x44, 0xbe, 0x57, 0xfe, 0x80, 0xe7, 0xa9,
-    0x3c, 0x49, 0x42, 0xb6, 0xed, 0x05, 0x57, 0x00,
-    0xd2, 0x25, 0x90, 0xb3, 0xe4, 0x65, 0x8f, 0xd6,
-    0x4e, 0x0c, 0x73, 0x30, 0x3b, 0x68, 0x48, 0xdd,
-// stream ^ sb
-//    0x02, 0x48, 0xbd, 0xe9, 0x10, 0x69, 0xef, 0x86,
-//    0xbc, 0x74, 0x1d, 0xd9, 0x74, 0x2e, 0x59, 0xa9,
-//    0x09, 0x4c, 0xcf, 0x9c, 0x13, 0x88, 0x24, 0x7f,
-//    0xeb, 0x9b, 0x8f, 0xdb, 0x44, 0x3f, 0xd9, 0xda,
-};
-unsigned char test_1_expected_block[0x100] = {
-    0xad, 0xf6, 0x46, 0x06, 0xae, 0x92, 0x00, 0x38,
-    0x47, 0x9b, 0xa3, 0x22, 0x92, 0x9b, 0xf4, 0xd5,
-    0xf0, 0xbf, 0x2a, 0x2d, 0x7f, 0xf4, 0xdd, 0x8c,
-    0x0d, 0x2e, 0x22, 0xb0, 0x1b, 0x01, 0xa5, 0x23,
-    0x89, 0x40, 0xbc, 0xdb, 0x8f, 0xab, 0x70, 0xb8,
-    0x27, 0x88, 0xcf, 0x9a, 0x4f, 0xae, 0xe9, 0x1a,
-    0xee, 0xfc, 0x3d, 0x82, 0x92, 0xd8, 0xb5, 0x33,
-    0xcb, 0x5e, 0xfe, 0xff, 0xe8, 0xd7, 0x51, 0x45,
-    0xa0, 0x17, 0x3b, 0x8c, 0x88, 0x7b, 0xd5, 0x0e,
-    0xc1, 0x9c, 0x63, 0x41, 0xf5, 0x5d, 0xaa, 0x8a,
-    0x5f, 0x37, 0x5b, 0xce, 0x7f, 0x76, 0xb4, 0x83,
-    0x74, 0x8f, 0x37, 0x47, 0x75, 0x6d, 0x2c, 0xca,
-    0x5a, 0x40, 0xa5, 0x75, 0x1a, 0x61, 0x81, 0x8d,
-    0xe4, 0x87, 0x17, 0xd0, 0x75, 0xee, 0x9a, 0x6b,
-    0x82, 0x6e, 0x47, 0x92, 0xd3, 0x32, 0x59, 0x5a,
-    0x03, 0x6e, 0x8a, 0x26, 0x7e, 0x0d, 0xf7, 0x7d,
-    0xf4, 0x4e, 0x79, 0x49, 0x59, 0x6f, 0x27, 0x2b,
-    0x80, 0x8f, 0x9e, 0x5b, 0xd6, 0xc0, 0xb0, 0x0b,
-    0xe6, 0x2e, 0xb2, 0xd5, 0x80, 0x10, 0x7f, 0xc1,
-    0xbf, 0xae, 0x1f, 0xd9, 0x6d, 0x57, 0x3c, 0x37,
-    0x4d, 0x21, 0xe4, 0xc8, 0x85, 0x44, 0xcf, 0xa0,
-    0x07, 0x93, 0x18, 0x83, 0xef, 0x35, 0xd4, 0xb1,
-    0xff, 0xfc, 0x44, 0x00, 0x66, 0xb1, 0x11, 0x11
-};
-unsigned char test_1_expected_kb[] = {
-    0xEE, 0x45, 0xE0, 0xC9, 0x02, 0x1B, 0xE0, 0x07,
-    0x46, 0xA4, 0x1C, 0x26, 0x7B, 0x0C, 0x01, 0xED,
-    0x93, 0x99, 0xC3, 0x14, 0xC4, 0x4A, 0x8D, 0x54,
-    0x19, 0x82, 0x39, 0xD1, 0x33, 0xB0, 0x33, 0x52,
-    0x75, 0x62, 0x80, 0x3A, 0xC8, 0x83, 0x5E, 0x23,
-    0xA2, 0x57, 0x0C, 0xC4, 0x2C, 0x2D, 0xD2, 0x98,
-    0xA0, 0x6C, 0x77, 0x29, 0x11, 0x42, 0x49, 0xCE,
-};
-unsigned char test_1_expected_kk[] = {
-    0x5e, 0x9d, 0xff, 0x2e, 0xbb, 0xaa, 0xa8, 0xe9,
-    0xf6, 0x0e, 0xff, 0x7c, 0xda, 0xce, 0x55, 0x03,
-    0xd9, 0xde, 0x79, 0xf5, 0x2c, 0xaf, 0x06, 0xf8,
-    0xb2, 0xc9, 0xf8, 0x78, 0x54, 0xf9, 0xd1, 0xe7,
-    0xeb, 0xbe, 0xd7, 0xeb, 0x25, 0xe9, 0x17, 0x99,
-    0xbf, 0x24, 0xce, 0x2a, 0x73, 0xfe, 0xf9, 0xbc,
-    0xd9, 0x55, 0x91, 0xcf, 0xe0, 0xc9, 0xdf, 0x88,
-};
-
-
-////////// test 2: even key
-unsigned char test_2_key[0x8] = {
-    0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
-};
-unsigned char test_2_encrypted[0x100] = {
-    0x47, 0x00, 0x00, 0x90,
-    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
-    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
-    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
-    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
-    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
-    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
-    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
-    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
-    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
-    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
-    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
-};
-unsigned char test_2_expected[0x100] = {
-    0x47, 0x00, 0x00, 0x90,
-    0x2d, 0x0a, 0x47, 0x20, 0x18, 0x11, 0x9c, 0x8a, 0xd1, 0x2a, 0x65, 0x6b, 0x89, 0xe4, 0x35, 0x2b,
-    0xc2, 0xb5, 0x90, 0x61, 0xd1, 0x7e, 0x02, 0xe1, 0x3f, 0x46, 0x70, 0xcf, 0x77, 0x91, 0x2f, 0x22,
-    0x93, 0xc1, 0x6c, 0xfe, 0x49, 0xad, 0x7c, 0xc2, 0xaf, 0x86, 0x1b, 0xa3, 0x29, 0xbe, 0xaa, 0x64,
-    0xf0, 0x22, 0xb9, 0x5e, 0x98, 0xaa, 0x60, 0xef, 0xdf, 0xd6, 0x44, 0x77, 0xe6, 0xbf, 0xbb, 0x94,
-    0xb2, 0x0a, 0x63, 0x0e, 0x5c, 0xf2, 0xac, 0xb4, 0x49, 0xcc, 0x9e, 0x4f, 0x94, 0x4c, 0x30, 0x12,
-    0xe8, 0x55, 0xc2, 0x44, 0xa4, 0x52, 0xcb, 0x61, 0x81, 0xc9, 0xb6, 0xa6, 0x6b, 0xef, 0xaf, 0xa6,
-    0x71, 0x1d, 0x7b, 0x58, 0x2f, 0xfa, 0xd1, 0x0c, 0x07, 0x9d, 0x1f, 0x35, 0x87, 0xbe, 0x02, 0x9f,
-    0x20, 0xc6, 0x60, 0x8f, 0x1c, 0x30, 0x0f, 0x96, 0xd0, 0x71, 0xd6, 0x51, 0x10, 0xdf, 0x5b, 0xf6,
-    0x44, 0x2f, 0x80, 0x28, 0xb7, 0xec, 0x23, 0x59, 0x4b, 0x94, 0x0b, 0x9a, 0x74, 0xa1, 0x1f, 0xf7,
-    0x9e, 0x76, 0xb4, 0xdf, 0xbb, 0x3c, 0x8c, 0x88, 0x97, 0x22, 0x56, 0x73, 0x16, 0x05, 0xac, 0xf9,
-    0x4f, 0x77, 0x9d, 0x38, 0xa0, 0x6b, 0x05, 0xd2, 0xe6, 0x15, 0x01, 0xb1, 0x5c, 0xc9, 0x62, 0xa9,
-    0x9b, 0x1a, 0x6a, 0x1a, 0xcf, 0xe6, 0xa8, 0xba,
-};
-
-
-////////// test 3: even key
-unsigned char test_3_key[0x8] = {
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-};
-unsigned char test_3_encrypted[0x100] = {
-    0x47, 0x00, 0x00, 0x90,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-
-};
-unsigned char test_3_expected[0x100] = {
-    0x47, 0x00, 0x00, 0x90,
-    0xfe, 0x91, 0xa7, 0x2f, 0xbf, 0xb0, 0x6a, 0x54, 0xc1, 0xe4, 0x33, 0x27, 0x18, 0xd5, 0x9c, 0x43,
-    0xea, 0xaa, 0x6b, 0x38, 0x5c, 0xe7, 0xae, 0xc9, 0xac, 0xec, 0xef, 0xc3, 0x51, 0x7d, 0x53, 0x47,
-    0xa0, 0xa7, 0x6d, 0x73, 0x8a, 0x9d, 0x16, 0x7d, 0x05, 0x2d, 0xd6, 0x6b, 0xf4, 0x8d, 0x4b, 0x81,
-    0x98, 0x2f, 0x46, 0xa5, 0x34, 0x84, 0xf3, 0x70, 0xa4, 0xe9, 0x04, 0x84, 0x7b, 0x87, 0x79, 0x3c,
-    0x01, 0x25, 0xb5, 0xfc, 0x3d, 0xd0, 0x25, 0xea, 0x2f, 0x91, 0xf0, 0x3f, 0x7f, 0xd4, 0x8e, 0x1e,
-    0x36, 0x83, 0x22, 0x91, 0x57, 0x92, 0x36, 0x0b, 0x44, 0xa5, 0xcc, 0x5e, 0xef, 0x44, 0x3e, 0xf8,
-    0xe9, 0x7b, 0x5e, 0x0c, 0xea, 0xb2, 0x50, 0x39, 0xb7, 0xea, 0xc4, 0xfb, 0xe4, 0x37, 0xf8, 0x85,
-    0xc2, 0xdc, 0x01, 0x98, 0x01, 0x2a, 0x44, 0xd3, 0x75, 0x10, 0x38, 0xf4, 0x85, 0x3e, 0xc9, 0xf7,
-    0xe7, 0xe4, 0xec, 0x40, 0x3d, 0x8f, 0xa5, 0xd2, 0x8a, 0xca, 0x62, 0x03, 0x3f, 0x65, 0x28, 0x8d,
-    0xf5, 0x56, 0xa7, 0xea, 0xd1, 0x0d, 0x70, 0x82, 0xbc, 0x90, 0x59, 0xf8, 0x3e, 0x08, 0xc9, 0xe1,
-    0x97, 0xef, 0x82, 0x43, 0x35, 0x41, 0x3e, 0x7f, 0x00, 0x96, 0x3f, 0x90, 0xe5, 0x1e, 0x96, 0xba,
-    0xce, 0x6d, 0xd2, 0x54, 0xce, 0x84, 0x76, 0x3c
-};
-
-
-////////// odd key, only 80 (0x50) bytes of payload (10 groups of 8 bytes + 0 byte residue)
-unsigned char test_p_10_0_key[0x8] = {
-    0x2d, 0x11, 0x5f, 0x9d, 0x29, 0xbf, 0x7f, 0x67
-};
-unsigned char test_p_10_0_encrypted[0x100] = {
-  0x47, 0x00, 0x7a, 0xbe,
-  0x67, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x71, 0xa5, 0x7b, 0x8f, 0xf9, 0x87, 0xcb, 0xac,
-  0xea, 0x08, 0x0c, 0x02, 0x87, 0x7b, 0xad, 0x10, 0x40, 0x28, 0x8e, 0xd4, 0x4e, 0x62, 0xc7, 0x74,
-  0xd6, 0xbb, 0x3a, 0xaa, 0xb0, 0x7b, 0x70, 0xbe, 0x06, 0xc9, 0xdc, 0x07, 0xd2, 0x2d, 0xab, 0x2d,
-  0xe2, 0xc6, 0x36, 0xa6, 0xda, 0x64, 0x61, 0x15, 0xd1, 0x6a, 0x40, 0xc0, 0xa9, 0xfb, 0x3f, 0xb2,
-  0x6d, 0xa5, 0x59, 0xae, 0x57, 0x88, 0x6b, 0x0e, 0x00, 0xae, 0xce, 0x64, 0xee, 0xfd, 0xb1, 0x7f,
-  0x78, 0x9c, 0x12, 0x42, 0xbe, 0x30, 0x8a, 0xa3 
-};
-unsigned char test_p_10_0_expected[0x100] = {
-  0x47, 0x00, 0x7a, 0xbe,
-  0x67, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, 0xca, 0x32, 0xaf, 0x2e, 0x6a, 0xea, 0x05,
-  0x39, 0x33, 0x67, 0x5d, 0xa3, 0x61, 0x0f, 0x34, 0x40, 0x6c, 0x1a, 0xb3, 0xee, 0x54, 0x64, 0xd5,
-  0xa3, 0x01, 0x95, 0x87, 0x9d, 0x3d, 0x38, 0xc5, 0x82, 0x8b, 0x8d, 0xab, 0xad, 0x93, 0x0f, 0xe8,
-  0xf9, 0xbd, 0x52, 0x98, 0x59, 0xb2, 0x41, 0x95, 0xcd, 0xae, 0x9b, 0x3e, 0xdf, 0xdb, 0x14, 0x9b,
-  0xa9, 0x22, 0x0d, 0x2d, 0x61, 0xf5, 0xf2, 0x52, 0x83, 0x20, 0xae, 0xb8, 0x83, 0x52, 0x02, 0xee,
-  0xbd, 0xd2, 0x94, 0x6c, 0x27, 0x58, 0x55, 0xd0
-};
-
-
-////////// odd key, only 14 (0x0e) bytes of payload (1 group of 8 bytes + 6 byte residue)
-unsigned char test_p_1_6_key[0x8] = {
-    0x2d, 0x11, 0x5f, 0x9d, 0x29, 0xbf, 0x7f, 0x67
-};
-unsigned char test_p_1_6_encrypted[0x100] = {
-  0x47, 0x00, 0x7a, 0xb7,
-  0xa9, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0x5e, 0xfb, 0xc8, 0x4a, 0x63,
-  0xe3, 0x3c, 0x11, 0xd9, 0xe0, 0x75, 0x8e, 0xf2 
-};
-unsigned char test_p_1_6_expected[0x100] = {
-  0x47, 0x00, 0x7a, 0xb7,
-  0xa9, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5a, 0x2c, 0xee, 0xb3, 0xde, 0x92,
-  0xe7, 0xa6, 0x6c, 0xaa, 0x99, 0x84, 0xe4, 0x00 
-};
diff --git a/contrib/sasc-ng/FFdecsa/Makefile b/contrib/sasc-ng/FFdecsa/Makefile
deleted file mode 100644
index 0108645..0000000
--- a/contrib/sasc-ng/FFdecsa/Makefile
+++ /dev/null
@@ -1,66 +0,0 @@
-##### compiling with g++ gives a little more speed
-#COMPILER=gcc
-#COMPILER=g++
-PARALLEL_MODE ?= PARALLEL_32_INT
-
-ARCH = $(shell uname -m)
-ifeq ($(ARCH), x86_64)
-  FLAGS=-Wall -O3 -march=x86-64 -mmmx -fexpensive-optimizations -funroll-loops -finline-limit=6000000 --param max-unrolled-insns=500
-endif
-ifeq ($(ARCH), athlon-xp)
-  FLAGS=-Wall -O3 -march=athlon-xp -fexpensive-optimizations -funroll-loops -finline-limit=6000000 --param max-unrolled-insns=500
-endif
-#ifeq ($(ARCH), i686)
-#  FLAGS=-Wall -O3 -march=pentium4 -mmmx -msse2 -fexpensive-optimizations -funroll-loops
-#endif
-
-###there are two functions which apparently don't want to be inlined
-#FLAGS=-O3 -march=athlon-xp -fexpensive-optimizations -funroll-loops -finline-limit=6000000 --param max-unrolled-insns=500
-#FLAGS=-O3 -march=athlon-xp -fexpensive-optimizations -funroll-loops --param max-unrolled-insns=500
-#FLAGS=-O3 -march=pentium3 -fexpensive-optimizations -funroll-loops
-
-###icc crashes for unknown reasons
-#COMPILER=/opt/intel_cc_80/bin/icc
-#FLAGS=-O3 -march=pentiumiii
-
-#FLAGS += -g
-#FLAGS += -fno-alias
-#FLAGS += -vec_report3
-#FLAGS += -Wall -Winline
-#FLAGS += -fomit-frame-pointer 
-#FLAGS += -pg
-
-COMPILER ?= g++
-FLAGS    ?= -Wall -O3 -march=pentium -mmmx -fomit-frame-pointer -fexpensive-optimizations -funroll-loops
-
-H_FILES = FFdecsa.h parallel_generic.h parallel_std_def.h \
-          parallel_032_4char.h \
-          parallel_032_int.h \
-          parallel_064_2int.h \
-          parallel_064_8charA.h \
-          parallel_064_8char.h \
-          parallel_064_long.h \
-          parallel_064_mmx.h \
-          parallel_128_16charA.h \
-          parallel_128_16char.h \
-          parallel_128_2long.h \
-          parallel_128_2mmx.h \
-          parallel_128_4int.h \
-          parallel_128_sse.h
-
-all: FFdecsa.o
-
-%.o: %.c
-	$(COMPILER) $(FLAGS) -DPARALLEL_MODE=$(PARALLEL_MODE) -c $<
-
-FFdecsa_test:	FFdecsa_test.o FFdecsa.o
-	$(COMPILER) $(FLAGS) -o FFdecsa_test FFdecsa_test.o FFdecsa.o
-
-FFdecsa_test.o: FFdecsa_test.c FFdecsa.h FFdecsa_test_testcases.h
-FFdecsa.o: 	FFdecsa.c stream.c $(H_FILES)
-
-clean:
-	rm -f FFdecsa_test *.o
-
-test:	FFdecsa_test
-	sync;usleep 200000;nice --19 ./FFdecsa_test
diff --git a/contrib/sasc-ng/FFdecsa/README b/contrib/sasc-ng/FFdecsa/README
deleted file mode 100644
index 503c744..0000000
--- a/contrib/sasc-ng/FFdecsa/README
+++ /dev/null
@@ -1,50 +0,0 @@
--------
-FFdecsa
--------
-version 1.0
-Copyright 2003-2004  fatih89r
-released under GPL
-
-
-FFdecsa is a fast implementation of a CSA decryption algorithm for MPEG
-TS packets. It is shockingly fast, more than 800% the speed of the
-fastest implementation I can find around. (read the docs to know what FF
-stands for)
-
-On an AthlonXP 2400 (2000MHz) it achieves 165Mbit/s; the previous record
-was around 20Mbit/s.
-
-This means that:
-- decrypting a 8Mbit/s stream takes 5% of CPU instead of 40%
-- decrypting a full transponder (with all its channels or with a big
-  HDTV stream) carrying 38Mbit/s takes 23% of CPU instead of 190%
-  (>100%, so undecryptable in real time)
-- a very slow processor can decrypt one channel with no problems
-- offline decoding of one hour of a 5Mbit/s channel takes less than
-  two minutes (30x than realtime)
-- offline decoding will work at more than 20MB/s (megabytes/s),
-  nearly as fast as a file copy
-
-The docs directory contains useful stuff:
-
-  FAQ.txt
-    to know something more about this software
-
-  how_to_compile.txt
-    if you want to compile this code (and get optimal speed)
-
-  how_to_use.txt
-    if you want to use this code
-
-  technical_background.txt
-    if you want to understand how this code works or you want to
-    modify/improve it
-
-  how_to_understand.txt
-    if you want to understand the code to make modifications
-
-  how_to_release.txt
-    if you want to release modified versions of the code
-
-
-fatih89r
diff --git a/contrib/sasc-ng/FFdecsa/docs/FAQ.txt b/contrib/sasc-ng/FFdecsa/docs/FAQ.txt
deleted file mode 100644
index 2d46f06..0000000
--- a/contrib/sasc-ng/FFdecsa/docs/FAQ.txt
+++ /dev/null
@@ -1,77 +0,0 @@
--------
-FFdecsa
--------
-
-FFdecsa is a fast implementation of the CSA decryption algorithm for MPEG
-TS packets.
-
-Q: What does FF stands for?
-A: FFdecsa means "Fucking Fast decsa".
-
-Q: Why would you use such a rude name?
-A: Because this code is fucking fast, more than 800% the speed of the best
-   implementation I'm able to find around at the moment.
-
-Q: How it that possible? Are all other programmers stupid?
-A: No, they just tried to save a cycle or two tweaking a fundamentally wrong
-   implementation. The algorithm has to be implemented in a totally different
-   way to achieve good speed.
-
-Q: Do you use multimedia instructions?
-A: I use every trick I could come up with, including multimedia instructions.
-   They are not fundamental in achieving speed, a version without them runs
-   at 6x the speed of the best implementation around (which uses MMX).
-
-Q: So how did you do that?
-A: By using a different approach for the implementation. This code is not
-   exploiting some new CSA vulnerability, it is just doing the same
-   calculations better. Think about replacing bubble sort with quick sort.
-
-Q: You're joking, it's impossible to gain so much speed.
-A: Speed test are available, technical documentation is available, source
-   code is available. Try it yourself.
-   If you want details, these are some of the documented tricks I used
-   (more details in the docs directory):
-    TRICK NUMBER 0: emulate the hardware
-    TRICK NUMBER 1: virtual shift registers
-    TRICK NUMBER 2: parallel bitslice
-    TRICK NUMBER 3: multimedia instructions
-    TRICK NUMBER 4: parallel byteslice
-    TRICK NUMBER 5: efficient bit permutation
-    TRICK NUMBER 6: efficient normal<->slice conversion
-    TRICK NUMBER 7: try hard to process packets together
-    TRICK NUMBER 8: try to avoid doing the same thing many times
-    TRICK NUMBER 9: compiler
-    TRICK NUMBER a: a lot of brain work
-
-Q: How can be this code useful?
-A: You can use this code in place of the old slow implementations and save a
-   lot of CPU power.
-
-Q: Just that?
-A: Well, new applications are possible.
-   Decrypting a whole transponder is easily doable now. Well, a $50 CPU can
-   decrypt four transponder at the same time if you have four DVB boards (but
-   I couldn't test that).
-
-Q: You're cheating, this code is fake, I don't believe one word.
-A: Go away. This is technical stuff for people with brains.
-
-Q: This code is great, may I distribute your code in original or modified
-   form?
-A: Only if you respect the license.
-
-Q: May I use your code in my player/library/plugin...?
-A: Again, you have to respect the license.
-
-Q: Are you an extraterrestrial programmer?
-A: No, just a Turkish guy with a PC to play with :-)
-
-Q: Why did you spend your time doing this?
-A: Because I thought that my approach was doable and I was sure it would
-   have been much faster, so I had to implement it to confirm I was right.
-   I got 8x the speed and that's enough to be proud of it. And I could not
-   just keep the code for myself only.
-
-Q: What is the answer to the meaning of the universe?
-A: 42,43,71,5f,65,85,f6,76,0d,13,28,96,...
diff --git a/contrib/sasc-ng/FFdecsa/docs/how_to_compile.txt b/contrib/sasc-ng/FFdecsa/docs/how_to_compile.txt
deleted file mode 100644
index 4f8c141..0000000
--- a/contrib/sasc-ng/FFdecsa/docs/how_to_compile.txt
+++ /dev/null
@@ -1,114 +0,0 @@
--------
-FFdecsa
--------
-
-Compiling is as easy as running a make command, if you have gcc and are
-using a little endian machine. 64 bit machines have not been tested but
-may work with little or no changes; big endian machines will certainly
-give incorrect results (read the technical_background.txt to know where
-the problem is).
-
-Before compiling you could edit the Makefile to tweak compiler flags for
-optimal performance. If you want to play with different bit-grouping
-strategies you have to edit FFdecsa_DBG.c and change the "our choice"
-definition. This is highly critical for performance.
-
-After compilation run the FFdecsa_test application. It will test correct
-decryption and print the meausered speed (use "nice --19 ./FFdecsa_test"
-on an idle machine for better results). Or just use "make test".
-
-gcc >=3.3.3 is highly recommended. Older versions could give performance
-problems.
-
-icc is currently unusable. In the initial phases of development of
-FFdecsa icc was able to compile the code and gave interesting speed
-results when using the 8charA grouping mode (array of 8 characters are
-automatically manipulated through MMX instructions). At some point the
-code began to work incorrectly because of a compiler bug (but I found a
-workaround). Then, the performance dropped with no reason; I found a
-workaround by adding an unused variable (alignment problem, grep for icc
-in the code to see where it happens). Then, with the introduction of
-group modes based on intrinsics, gcc was finally able to go beyond the
-speed record originally set by icc. Additional code tweaks added more
-speed to gcc, while icc started to segfault on compilation (both version
-7 and 8). In conclusion, icc is bugged and this code is too hard for it.
-gcc on the other hand is great. I tried to inspect generated assembler
-to find weak spots, and the generated code is very good indeed.
-
-Note: the code can be compiled with gcc or g++. g++ is 3% faster for
-some reason.
-
-You should not get any errors or warnings. I only get two "inlining
-failed" warnings on two functions I asked to be inlined but gcc doesn't
-want to inline.
-
-The build process creates additional temp files by running grep
-commands. This is how debugging output is handled. All the lines
-containing DBG are removed and the temp file is compiled (so the line
-numbers change between temp and original files). Don't edit the temp
-files, they will be overwritten. If you don't remove the DBG lines (for
-example, by changing "grep -v DBG" into "grep -v aaDBG" in Makefile) a
-lot of output will be generated. This is useful to understand what's
-wrong when the FFdecsa_test is failing. I included a reference "known
-good" output in the debug_output directory. Extra debug output is
-commented out in the code.
-
-The debug output functionality could be... bugged. This is because I
-tested everything using hard coded int grouping mode and then
-generalized the debug output to abstract grouping modes. A bug where 4
-bytes are printed instead of 8 could be present somewhere. I think it
-isn't, but you've been warned.
-
-This code was only tried on Linux.
-It should work on Windows or other platforms, but you may encounter
-problems related to the compiler quality. If you want to try, begin with
-the int grouping mode. It is only 30% slower then the best (MMX) and it
-should be easily portable because no intrinsics are used. I'm
-particularly interested in hearing what kind of performance can be
-obtained on x86_64 processors in int, long long int, mmx, 2mmx, sse
-modes.
-
-
-As a reference, here are the results I get on an Athlon XP 2400+ (this
-processor runs at 2000MHz); other processors belonging to the Athlon XP
-architecture, including Durons, should have the same speed per MHz.
-Cache size and bus speed don't matter.
-
-CPU: AMD Athlon XP 2400+
-
-Compiler: g++ (gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7))
-
-Flags: -O3 -march=athlon-xp -fexpensive-optimizations -funroll-loops
-       --param max-unrolled-insns=500
-
-grouping mode           speed (Mbit/s)    notes
----------------------------------------------------------------------
-PARALLEL_32_4CHAR            14
-PARALLEL_32_4CHARA           12
-PARALLEL_32_INT             125           very good and very portable
-PARALLEL_64_8CHAR            17
-PARALLEL_64_8CHARA           15           needs a vectorizing compiler
-PARALLEL_64_2INT             75           x86 has too few registers
-PARALLEL_64_LONG             97           try this on x86_64
-PARALLEL_64_MMX             165           the best
-PARALLEL_128_16CHAR           6
-PARALLEL_128_16CHARA          7
-PARALLEL_128_4INT            69
-PARALLEL_128_2LONG           52
-PARALLEL_128_2MMX            36           slower than expected
-PARALLEL_128_SSE            156           just slower than 64_MMX
-
-Best speeds are obtained with native data types: int, mmx, sse (this
-could be a compiler artifact).
-
-64 bit processors should try 64_LONG.
-
-Vectorizing compilers should like *CHARA.
-
-64_MMX is faster than 128_SSE on the Athlon; perhaps SSE instruction are
-internally split into 64 bit chunks. Could be different on x86_64 or
-Intel processors.
-
-128_SSE has a 64 bit (MMX) batch type because SSE has no shifting
-instructions, they are only available on SSE2. As the Athlon XP doesn't
-support SSE2, I couldn't experiment with that.
diff --git a/contrib/sasc-ng/FFdecsa/docs/how_to_release.txt b/contrib/sasc-ng/FFdecsa/docs/how_to_release.txt
deleted file mode 100644
index 923a61b..0000000
--- a/contrib/sasc-ng/FFdecsa/docs/how_to_release.txt
+++ /dev/null
@@ -1,21 +0,0 @@
--------
-FFdecsa
--------
-
-Please use the name of the release you're basing on as a base name and
-add your suffix.
-
-For example if john modifies
-  FFdecsa-1.0.0
-he should release
-  FFdecsa-1.0.0-john_0.3
-or
-  FFdecsa-1.0.0-john_0.4
-
-If paul modifies john's version the correct name would be like
-  FFdecsa-1.0.0-john_0.4-paul_0.1
-
-This is to avoid many different versions with random version numbers, as
-development is not centralized.
-
-Thank you.
diff --git a/contrib/sasc-ng/FFdecsa/docs/how_to_understand.txt b/contrib/sasc-ng/FFdecsa/docs/how_to_understand.txt
deleted file mode 100644
index 4b3f2f1..0000000
--- a/contrib/sasc-ng/FFdecsa/docs/how_to_understand.txt
+++ /dev/null
@@ -1,15 +0,0 @@
--------
-FFdecsa
--------
-
-First, you need to know how decsa works, study the source of a classical
-implementation. Then you have to understand how things are done in
-slicing mode. Read all the documentation and have a working classical
-implementation to compare partial results. There are comments spread
-around the code. Some things are difficult to understand without paper
-notes; for example the matrix transpositions and meaning of array
-indices.
-
-Sorry, it is hard to understand and modify ...
-
-... but it was harder to design and implement!!!
diff --git a/contrib/sasc-ng/FFdecsa/docs/how_to_use.txt b/contrib/sasc-ng/FFdecsa/docs/how_to_use.txt
deleted file mode 100644
index 46fedd3..0000000
--- a/contrib/sasc-ng/FFdecsa/docs/how_to_use.txt
+++ /dev/null
@@ -1,239 +0,0 @@
--------
-FFdecsa
--------
-
-This code is able to decrypt MPEG TS packets with the CSA algorithm. To
-achieve high speed, the decryption core works on many packets at the
-same time, so the interface is more complicated than usual decsa
-implementations.
-
-The FFdecsa.h file defines the external interface of this code.
-
-Basically:
-
-1) you use get_suggested_cluster_size to know the optimal number of
-packets you have to pass for decryption
-
-2) you use set_control_words to set the decryption keys
-
-3) you use decrypt_packets to do the actual decryption
-
-You don't need to always use set_control_words before decrypt_packets,
-if keys aren't changed. 
-
-
-The decrypt_packets function call decrypts many packets at the same
-time. The interface is complicated because the only design goal was
-speed, so it implements zero-copying of packets, out-of-order decryption
-and optimal packet aggregation for better parallelism. This part is the
-most difficult to understand.
-
---- HOW TO USE int decrypt_packets(unsigned char **cluster); ---
-
-PARAMETERS
-  cluster points to an array of pointers, representing zero or more
-  ranges. Every range has a start and end pointer; a start pointer==NULL
-  terminates the array.
-  So, an array of pointers has this content:
-    start_of_buffer_1, end_of_buffer_1, ... start_of_buffer_N,
-    end_of_buffer_N, NULL
-  example:
-    0x12340000, 0x123400bc, 0x56780a00, 0x5678b78, NULL
-  has two ranges (0x12340000 - 0x123400bc and  0x56780a00 - 0x5678b78),
-  for a total of three packets (starting at 0x12340000, 0x56780a00,
-  0x5678abc)
-RETURNS
-  How many packets can now be consumed by the caller, this is always >=
-  1, unless the cluster contained zero packets (in that case it's
-  obviously zero).
-MODIFIES
-  The cluster is modified to try to exclude packets which shouldn't be
-  submitted again for decryption (because just decrypted or originally
-  not crypted). "Try to exclude" because the returned array will never
-  be bigger than what was passed, so if you passed only a range and some
-  packets in the middle were decrypted making "holes" into the range,
-  the range would have to be split into several ranges, and that will
-  not be done. If you want a strict description of what has to be passed
-  again to decrypt_packets, you have to use ranges with only one packet
-  inside. Note that the first packet will certainly be eliminated from
-  the returned cluster (see also RETURNS).
-
-You can now read the detailed description of operation or just skip to
-the API examples.
-
-
----------------------------------
-DETAILED DESCRIPTION OF OPERATION
----------------------------------
-  consider a sequence of packets like this:
-   0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
-   E  E  E  E  E  E  E  E  E  E  E  O  E  O  E  O  O  0  0  0  0  0  0  0  0  c  O  O  O  O  O  O  O  O  O  O  O ...
-  where
-   E = encrypted_even,
-   O = encrypted_odd,
-   e = clear_was_encrypted_even,
-   o = clear_was_encrypted_odd,
-   c = clear
-  and suppose the suggested cluster size is 10 (this could be for a function with internal parallelism 8)
-
-  1) we define the cluster to include packets 0-9 and
-  call decrypt_packets
-  a possible result is that the function call
-  - returns 8 (8 packets available)
-  - the buffer contains now this
-  -----------------------------
-   0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
-   e  e  e  e  e  e  e  e  E  E  E  O  E  O  E  O  O  0  0  0  0  0  0  0  0  c  O  O  O  O  O  O  O  O  O  O  O ...
-                          -----
-  - the modified cluster covers 8-9 [continue reading, but then see note 1 below]
-  so, we can use the first 8 packets of the original cluster (0-7)
-
-  2) now, we define cluster over 8-17 and call decrypt_packets
-  a possible result is:
-  - returns 3 (3 packets available)
-  - the buffer contains now this (!!!)
-                          -----------------------------
-   0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
-   e  e  e  e  e  e  e  e  e  e  e  O  e  O  e  O  O  0  0  0  0  0  0  0  0  c  O  O  O  O  O  O  O  O  O  O  O ...
-                                   --    --    --------
-  - the modified cluster covers 11-11,13-13,15-17 [continue reading, but then see note 1 below]
-  so, we can use the first 3 packets of the original cluster (8-10)
-
-  3) now, we define cluster over 11-20 and call decrypt packets (defining a cluster 11-11,13-13,15-22 would be better)
-  a possible result is:
-  - returns 10 (10 packets available)
-  - the buffer contains now this
-                                   -----------------------------
-   0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
-   e  e  e  e  e  e  e  e  e  e  e  o  e  o  e  o  o  o  o  o  o  0  0  0  0  c  O  O  O  O  O  O  O  O  O  O  O ...
-
-  - the modified cluster is empty
-  so, we can use the first 10 packets of the original cluster (11-20)
-  What it happened is that the second call decrypted packets 12 and 14 but they were
-  not made available because packet 11 was still encrypted,
-  the third call decrypted 11,13,15-20 and included 12 and 14 as available too.
-
-  4) now, we define cluster over 21-30 and call decrypt packets
-  a possible result is:
-  - returns 9 (9 packets available)
-  - the buffer contains now this
-                                                                 -----------------------------
-   0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 ...
-   e  e  e  e  e  e  e  e  e  e  e  o  e  o  e  o  o  o  o  o  o  o  o  o  o  c  o  o  o  o  O  O  O  O  O  O  O ...
-                                                                                            --
-  - the modified cluster covers 30-30
-  so, we can use the first 9 packets of the original cluster (21-29)
-  What happened is that packet 25 could be skipped because it is in clear.
-
-  Note that the suggested cluster size (10) is higher than the maximum number
-  of packets that can be really decrypted (8), but we are able to skip 12 and 14
-  in step 3) and run the decryption on a full 8 packets group.
-  In the same way, we were able to skip 25 in step 4).
-  There are three kinds of "free" packets we can skip:
-  - packets decrypted in a previous call (as 12 and 14)
-  - packets already in clear (as 25)
-  - packets with a payload of less than 8 bytes (clear==encrypted!)
-
-  Note also that we could have defined a better cluster in step 3
-  (11-11,13-13,15-22), using what step 2 had returned. The risk of not
-  having 8 packets to decrypt would have been smaller (consider the case
-  where 19 and 20 were "c").
-
-  Final considerations:
-  - you can use a bigger or smaller cluster than the suggested number of packets
-  - every call to decrypt_packets has a *fixed* CPU cost, so you should try to
-    not run it with a few packets, when possible
-  - decrypt_packets can't decrypt even and odd at the same time; it guarantees
-    that the first packet will be decrypted and tries to decrypt as many packets
-    as possible
-  - clear packets in the middle of encrypted packets don't happen in real world,
-    but E,E,E,O,E,O,O,O sequences do happen (audio/video muxing problems?) and
-    small packets (<8 bytes) happen frequently; the ability to skip is useful.
-
-  note 1:
-    As the returned cluster will not have more ranges than the passed one, what it is
-    described above is not actually true.
-    In the step 1) the returned cluster will cover 8-9, but in step 2) it will
-    cover 11-17 (some extra packets had to remain in); this lack of information
-    prevents us from using an optimal 11-11,13-13,15-22 in step 3). Note that
-    in any case step 3) will decrypt 11,13,15,16,17,18,19,20 thanks to the
-    extra margin we use (we put ten packets (including 19 and 20) even if the
-    parallelism was just 8, and it was a good idea; but if 19 and 20 were of
-    type c, we would have run the decryption with only 6/8 efficiency).
-    This problem can be prevented by using ranges with only one packet: in
-    step 2) we would have passed
-    8-8,9-9,10-10,11-11,12-12,13-13,14-14,15-15,16-16,17-17
-    and got back
-    11-11,13-13,15-17.
-
-
-------------
-API EXAMPLES
-------------
-
-Some examples of how the API can be used (this is not real code, so it
-may have typos or other bugs).
-
-
-Example 1: (big linear buffer, simple use of cluster)
-
-  unsigned char *p;
-  unsigned char *cluster[3];
-  for(p=start;p<end;){
-    cluster[0]=p;cluster[1]=end;
-    cluster[2]=NULL;
-    p+=188*decrypt_packets(cluster);
-  }
-  //consume(start,end);
-
-
-Example 2: (circular buffer, simple use of cluster)
-
-  unsigned char *p;
-  unsigned char *cluster[5];
-
-  while(1){
-    if(read==write){
-      //buffer is empty
-      //write=refill_buffer(write,start,end);
-      continue;
-    }
-    else if(read<write){
-      cluster[0]=read;cluster[1]=write;
-      cluster[2]=NULL;
-    }
-    else{
-      cluster[0]=read;cluster[1]=end;
-      cluster[2]=start;cluster[3]=write;
-      cluster[4]=NULL;
-    }
-    new_read=read+188*decrypt_packets(cluster);
-    if(new_read<=end){
-      //consume(read,new_read);
-    }
-    else{
-      new_read=start+(new_read-end);
-      //consume(read,end);
-      //consume(start,new_read);
-    }
-    read=new_read;
-    if(read==end) read=start;
-  }
-
-
-Example 3: (undefined buffer structure, advanced use of cluster)
-
-  unsigned char *packets[1000000];
-  unsigned char *cluster[142]; //if suggested packets is 70
-  
-  cluster[0]=NULL;
-  for(n=0;n<1000000;){
-    i=0;
-    while(cluster[2*i]!=NULL) i++; //preserve returned ranges
-    for(k=i;k<70&&n<1000000;k++,n++){
-      cluster[2*k]=packets[n];cluster[2*k+1]=packets[n]+188;
-    }
-    cluster[2*k]=NULL;
-    decrypt_packets(cluster);
-  }
-  //consume_all_packets();
diff --git a/contrib/sasc-ng/FFdecsa/docs/technical_background.txt b/contrib/sasc-ng/FFdecsa/docs/technical_background.txt
deleted file mode 100644
index 613e208..0000000
--- a/contrib/sasc-ng/FFdecsa/docs/technical_background.txt
+++ /dev/null
@@ -1,341 +0,0 @@
--------
-FFdecsa
--------
-
-This doc is for people who looked into the source code and found it
-difficult to believe that this is a decsa algorithm, as it appears
-completely different from other decsa implementations.
-
-It appears different because it is different. Being different is what
-enables it to be a lot faster than all the others (currently it has more
-than 800% the speed of the best version I was able to find)
-
-The csa algo was designed to be run in hardware, but people are now
-running it in software.
-
-Hardware has data lines carrying bits and functional blocks doing
-calculations (logic operations, adders, shifters, table lookup, ...),
-software instead uses memory to contain data values and executes a
-sequence of instructions to transform the values. As a consequence,
-writing a software implementation of a hardware algorithm can be
-inefficient.
-
-For example, if you have 32 data lines, you can permutate the bits with
-zero cost in hardware (you just permute the physical traces), but if you
-have the bits in a 32 bit variable you have to use 32 "and" operations
-with 32 different masks, 32 shifts and 31 "or" operations (if you
-suggest using "if"s testing the bits one by one you know nothing about
-how jump prediction works in modern processors).
-
-So the approach is *emulating the hardware*.
-
-Then there are some additional cool tricks.
-
-TRICK NUMBER 0: emulate the hardware
-------------------------------------
-We will work on bits one by one, that is a 4 bit word is now four
-variables. In this way we revert complex software operations into
-hardware emulation:
-
-  software                      hardware
-  -------------------------------------------
-  copy values                   copy values
-  logic op                      logic op
-  (bit permut.) ands+shifts+ors copy values
-  additions                     logic op emulating adders
-  (comparisons) if              logic op selecting one of the two results
-  lookup tables                 logic op synthetizing a ROM (*)
-
-(*) sometimes lookup tables can be converted to logic expressions
-
-The sbox in the stream cypher have been converted to efficient logic
-operations using a custom written software (look into logic directory)
-and is responsible for a lot of speed increase. Maybe there exists a
-slightly better way to express the sbox as logical expressions, but it
-would be a minuscule improvement. The sbox in the block cypher can't be
-converted to efficient logic operations (8 bits of inputs are just too
-much) and is implemeted with a traditional lookup in an array.
-
-But there is a problem; if we want to process bits, but our external
-input and output wants bytes. We need conversion routines. Conversion
-routines are similar to the awful permutations we described before, so
-this has to be done efficiently someway.
-
-
-TRICK NUMBER 1: virtual shift registers
----------------------------------------
-Shift registers are normally implemented by moving all data around.
-Better leave the data in the same memory locations and redefine where
-the start of the register is (updating a pointer). That is called
-virtual shift register.
-
-
-TRICK NUMBER 2: parallel bitslice
----------------------------------
-Implementing the algorithm as described in tricks 1 and 2 give us about
-15% of the speed of a traditional implementation. This happens because
-we work on only one bit, even if our CPU is 32 bit wide. But *we can
-process 32 different packets at the same time*. This is called
-"bitslice" method. It can be done only if the program flow is not
-dependent of the data (if, while,...). Luckily this is true.
-Things like
-  if(a){
-    b=c&d;
-  }
-  else{
-    b=e&f;
-  }
-can be coded as (think of how hardware would implement this)
-  b1=c&d;
-  b2=e&f;
-  b=b2^(a&(b1^b2));
-and things like
-  if(a){
-    b=c&d
-  }
-can be transformed in the same way, as they may be written as
-  if(a){
-    b=c&d
-  }
-  else{
-    b=b;
-  }
-It could look wasteful, but it is not; and destroys data dependency.
-
-Our codes takes the same time as before, but produces 32 results, so
-speed is now 480% the speed of a traditional implementation.
-
-
-TRICK NUMBER 3: multimedia instructions
----------------------------------------
-If our CPU is 32 bit but it can also process larger blocks of data
-efficiently (multimedia instructions), we can use them. We only need
-logic ops and these are typically available.
-
-We can use MMX and work on 64 packets, or SSE and work on 128 packets.
-The speed doesn't automatically double going from 32 to 64 because the
-integer registers of the processor are normally faster. However, some
-speed is gained in this way.
-
-Multimedia instructions are often used by writing assembler by hand, but
-compilers are very good in doing register allocation, loop unrolling and
-instruction scheduling, so it is better to write the code in C and use
-native multimedia data types (intrinsics).
-
-Depending on number of available registers, execution latency, number of
-execution units in the CPU, it may be good to process more than one data
-block at the same time, for example 2 64bit MMX values. In this case we
-work on 128 bits by simulating a 128 bit op with two consecutive 64 bit
-op. This may or may not help (apparently not because x86 architecture
-has a small number of registers).
-
-We can also try working on 96 bit, pairing a MMX and an int op, or 192
-bit by using MMX and SSE. While this is doable in theory and could
-exploit different execution units in the CPU, speed doesn't improve
-(because of cache line handling problems inside the CPU, maybe).
-
-Besides int, MMX, SSE, we can use long long int (64 bit) and, why not,
-unsigned char.
-
-Using groups of unsigned chars (8 or 16) could give the compiler an
-opportunity to insert multimedia instructions automatically. For
-example, icc can use one MMX istruction to do
-  unsigned char a[8],b[8],c[8];
-  for(i=0;i<8;i++){
-    a[i]=b[i]&c[i];
-  }
-Some compilers (like icc) are efficient in this case, but using
-intrinsics manually is generally faster.
-
-All these experiments can be easily done if the code is written in a way
-which abstracts the data type used. This is not easy but doable, all the
-operations on data become (inlined) function calls or preprocessor
-macros. Good compilers are able to simplify all the abstraction at
-compile time and generate perfect code (gcc is great).
-
-The data abstraction used in the code is called "group".
-
-
-TRICK NUMBER 4: parallel byteslice
-----------------------------------
-The bitslice method works wonderfully on the stream cypher, but can't be
-applied to the block cypher because of the evil big look up table.
-
-As we have to convert input data from normal to bitslice before starting
-processing and from bitslice to normal before output, we convert the
-stream cypher output to normal before the block calculations and do the
-block stage in a traditional way.
-
-There are some xors in the block cypher; so we arrange bytes from
-different packets side by side and use multimedia instructions to work
-on many bytes at the same time. This is not exactly bitslice, maybe it
-is called byteslice. The conversion routines are similar (just a bit
-simpler).
-
-The data type we use to do this in the code is called "batch".
-
-The virtual shift register described in trick number 2 is useful too.
-
-The look up table is the only thing which is done serially one byte at a
-time. Luckily if we do it on 32 or 64 bytes the loop is heavily
-unrolled, and the compiler and the CPU manage to get a good speed
-because there is little dependency between instructions.
-
-
-TRICK NUMBER 5: efficient bit permutation
------------------------------------------
-The block cypher has a bit permutation part. As we are not in a bit
-sliced form at that point, permuting bits in a byte takes 8 masks, 8
-and, 7 or; but three bits move in the same direction, so we make it with
-6 masks, 6 and, 5 or. Batch processing through multimedia instructions
-is applicable too.
-
-
-TRICK NUMBER 6: efficient normal<->slice conversion
----------------------------------------------------
-The bitslice<->normal conversion routines are a sort of transposition
-operation, that is you have bits in rows and want them in columns. This
-can be done efficiently. For example, transposition of 8 bytes (matrix
-of 8x8=64 bits) can be done this way (we want to exchange bit[i][j] with
-bit[j][i] and we assume bit 0 is the MSB in the byte):
-
-  // untested code, may be bugged
-  unsigned char a[8];
-  unsigned char b[8];
-  for(i=0;i<8;i++) b[i]=0;
-  for(i=0;i<8;i++){
-    for(j=0;j<8;j++){
-      b[i]|=((a[j]>>(7-i)&1))<<(7-j);
-    }
-  }
-
-but it is slow (128 shifts, 64 and, 64 or), or
-
-  // untested code, may be bugged
-  unsigned char a[8];
-  unsigned char b[8];
-  for(i=0;i<8;i++) b[i]=0;
-  for(i=0;i<8;i++){
-    for(j=0;j<8;j++){
-      if(a[j]&(1<<(7-i))) b[i]|=1<<(7-j);
-    }
-  }
-
-but is very very slow (128 shifts, 64 and, 64 or, 128 unpredictable
-if!), or using a>>=1 and b<<=1, which gains you nothing, or
-
-  // untested code, may be bugged
-  unsigned char a[8];
-  unsigned char b[8];
-  unsigned char top,bottom;
-  for(j=0;j<1;j++){
-    for(i=0;i<4;i++){
-      top=   a[8*j+i];
-      bottom=a[8*j+4+i];
-      a[8*j+i]=   (top&0xf0)    |((bottom&0xf0)>>4);
-      a[8*j+4+i]=((top&0x0f)<<4)| (bottom&0x0f);
-    }
-  }
-  for(j=0;j<2;j++){
-    for(i=0;i<2;i++){
-      top=   a[4*j+i];
-      bottom=a[4*j+2+i];
-      a[4*j+i]  = (top&0xcc)    |((bottom&0xcc)>>2);
-      a[4*j+2+i]=((top&0x33)<<2)| (bottom&0x33);
-    }
-  }
-  for(j=0;j<4;j++){
-    for(i=0;i<1;i++){
-      top=   a[2*j+i];
-      bottom=a[2*j+1+i];
-      a[2*j+i]  = (top&0xaa)    |((bottom&0xaa)>>1);
-      a[2*j+1+i]=((top&0x55)<<1)| (bottom&0x55);
-    }
-  }
-  for(i=0;i<8;i++) b[i]=a[i]; //easy to integrate into one of the stages above
-
-which is very fast (24 shifts, 48 and, 24 or) and has redundant loops
-and address calculations which will be optimized away by the compiler.
-It can be written as 3 nested loops but it becomes less readable and
-makes it difficult to have results in b without an extra copy. The
-compiler always unrolls heavily.
-
-The gain is much bigger when operating with 32 bit or 64 bit values (we
-are going from N^2 to Nlog(N)). This method is used for rectangular
-matrixes too (they have to be seen as square matrixes side by side).
-Warning: this code is not *endian independent* if you use ints to work
-on 4 bytes. Running it on a big endian processor will give you a
-different and strange kind of bit rotation if you don't modify masks and
-shifts.
-
-This is done in the code using int or long long int. It should be
-possible to use MMX instead of long long int and it could be faster, but
-this code doesn't cost a great fraction of the total time. There are
-problems with the shifts, as multimedia instructions do not have all
-possible kind of shift we need (SSE has none!).
-
-
-TRICK NUMBER 7: try hard to process packets together
-----------------------------------------------------
-As we are able to process many packets together, we have to avoid
-running with many slots empty. Processing one packet or 64 packets takes
-the same time if the internal parallelism is 64! So we try hard to
-aggregate packets that can be processed together; for simplicity reasons
-we don't mix packets with even and odd parity (different keys), even if
-it should be doable with a little effort. Sometimes the transition from
-even to odd parity and viceversa is not sharp, but there are sequences
-like EEEEEOEEOEEOOOO. We try to group all the E together even if there
-are O between them. This out-of-order processing complicates the
-interface to the applications a bit but saves us three or four runs with
-many empty slots.
-
-We have also logic to process together packets with a different size of
-the payload, which is not always 184 bytes. This involves sorting the
-packets by size before processing and careful operation of the 23
-iteration loop to exclude some packets from the calculations. It is not
-CPU heavy.
-
-Packets with payload <8 bytes are identical before and after decryption
-(!), so we skip them without using a slot. (according to DVB specs these
-kind of packets shouldn't happen, but they are used in the real world).
-
-
-TRICK NUMBER 8: try to avoid doing the same thing many times
-------------------------------------------------------------
-Some calculations related to keys are only done when the keys are set,
-then all the values depending on keys are stored in a convenient form
-and used everytime we convert a group of packets.
-
-
-TRICK NUMBER 9: compiler
-------------------------
-
-Compilers have a lot of optimization options. I used -march to target my
-CPU and played with unsual options. In particular
-  "--param max-unrolled-insns=500"
-does a good job on the tricky table lookup in the block cypher. Bigger
-values unroll too much somewhere and loose speed. All the testing has
-been done on an AthlonXP CPU with a specific version of gcc
-  gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
-Other combinations of CPU and compiler can give different speeds. If the
-compiler is not able to simplify the group and batch structures and
-stores everything in memory instead of registers, performance will be
-low.
-
-Absolutely use a good compiler!
-
-Note: the same code can be compiled in C or C++ mode. g++ gives a 3%
-speed increase compared to gcc (I suppose some stricter constraint on
-array and pointers in C++ mode gives the optimizer more freedom).
-
-
-TRICK NUMBER a: a lot of brain work
------------------------------------
-The code started as very slow but correct implementation and was then
-tweaked for months with a lot of experimentation and by adding all the
-good ideas one after another to achieve little steps toward the best
-speed possible, while continously testing that nothing had been broken.
-
-Many hours were spent on this code.
-
-Enjoy the result.
diff --git a/contrib/sasc-ng/FFdecsa/fftable.h b/contrib/sasc-ng/FFdecsa/fftable.h
deleted file mode 100644
index ed6345f..0000000
--- a/contrib/sasc-ng/FFdecsa/fftable.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2007 Dark Avenger
- *               2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef FFTABLE_H
-#define FFTABLE_H
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data)
-{
-#if 0
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-#else
-  *(((long long *)tab)+g)=*((long long *)data);
-#endif
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g)
-{
-#if 1
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-#else
-  *((long long *)data)=*(((long long *)tab)+g);
-#endif
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g)
-{
-  for(int j=0;j<n;j++) *(data+j)^=*(tab+8*g+j);
-}
-
-#undef XOREQ_BEST_BY
-static inline void XOREQ_BEST_BY(unsigned char *d, unsigned char *s)
-{
-	XOR_BEST_BY(d, d, s);
-}
-
-#endif //FFTABLE_H 
diff --git a/contrib/sasc-ng/FFdecsa/logic/Makefile b/contrib/sasc-ng/FFdecsa/logic/Makefile
deleted file mode 100644
index 3e4aed1..0000000
--- a/contrib/sasc-ng/FFdecsa/logic/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-all: logic
-
-logic: logic.o
-	gcc -o logic logic.o
-
-logic.o: logic.c
-	gcc -O3 -march=athlon-xp -c logic.c
-
-clean:
-	rm logic *.o
diff --git a/contrib/sasc-ng/FFdecsa/logic/logic.c b/contrib/sasc-ng/FFdecsa/logic/logic.c
deleted file mode 100644
index 8be38dc..0000000
--- a/contrib/sasc-ng/FFdecsa/logic/logic.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/* logic -- synthetize logic functions with 4 inputs
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-
-
-/* Can we use negated inputs? */
-#define noNEGATEDTOO
-
-
-#include <stdio.h>
-
-
-/*
- * abcd
- */
-
-#define BINARY(b15,b14,b13,b12,b11,b10,b9,b8,b7,b6,b5,b4,b3,b2,b1,b0) \
-  ((b15)<<15)|((b14)<<14)|((b13)<<13)|((b12)<<12)| \
-  ((b11)<<11)|((b10)<<10)|((b9) << 9)|((b8) << 8)| \
-  ((b7) << 7)|((b6) << 6)|((b5) << 5)|((b4) << 4)| \
-  ((b3) << 3)|((b2) << 2)|((b1) << 1)|((b0) << 0)
-
-struct fun{
-  int level;
-  int op_type;
-  int op1;
-  int op2;
-};
-
-struct fun db[65536];
-int n_fun;
-
-#define LEVEL_ALOT 1000000
-
-#define OP_FALSE 0
-#define OP_TRUE  1
-#define OP_SRC   2
-#define OP_AND   3
-#define OP_OR    4
-#define OP_XOR   5
-
-#define SRC_A 10
-#define SRC_B 20
-#define SRC_C 30
-#define SRC_D 40
-#define SRC_AN 11
-#define SRC_BN 21
-#define SRC_CN 31
-#define SRC_DN 41
-
-void dump_element_prefix(int);
-void dump_element_infix(int);
-
-int main(void){
-  int i,j;
-  int l,p1,p2;
-  int candidate;
-  int max_p2_lev;
-  
-  for(i=0;i<65536;i++){
-    db[i].level=LEVEL_ALOT;
-  }
-  n_fun=0;
-
-  db[0].level=0;
-  db[0].op_type=OP_FALSE;
-  n_fun++;
-
-  db[65535].level=0;
-  db[65535].op_type=OP_TRUE;
-  n_fun++;
-
-  db[BINARY(0,0,0,0, 0,0,0,0,  1,1,1,1, 1,1,1,1)].level=0;
-  db[BINARY(0,0,0,0, 0,0,0,0,  1,1,1,1, 1,1,1,1)].op_type=OP_SRC;
-  db[BINARY(0,0,0,0, 0,0,0,0,  1,1,1,1, 1,1,1,1)].op1=SRC_A;
-  n_fun++;
-
-  db[BINARY(0,0,0,0, 1,1,1,1,  0,0,0,0, 1,1,1,1)].level=0;
-  db[BINARY(0,0,0,0, 1,1,1,1,  0,0,0,0, 1,1,1,1)].op_type=OP_SRC;
-  db[BINARY(0,0,0,0, 1,1,1,1,  0,0,0,0, 1,1,1,1)].op1=SRC_B;
-  n_fun++;
-
-  db[BINARY(0,0,1,1, 0,0,1,1,  0,0,1,1, 0,0,1,1)].level=0;
-  db[BINARY(0,0,1,1, 0,0,1,1,  0,0,1,1, 0,0,1,1)].op_type=OP_SRC;
-  db[BINARY(0,0,1,1, 0,0,1,1,  0,0,1,1, 0,0,1,1)].op1=SRC_C;
-  n_fun++;
-
-  db[BINARY(0,1,0,1, 0,1,0,1,  0,1,0,1, 0,1,0,1)].level=0;
-  db[BINARY(0,1,0,1, 0,1,0,1,  0,1,0,1, 0,1,0,1)].op_type=OP_SRC;
-  db[BINARY(0,1,0,1, 0,1,0,1,  0,1,0,1, 0,1,0,1)].op1=SRC_D;
-  n_fun++;
-#ifdef NEGATEDTOO
-  db[BINARY(1,1,1,1, 1,1,1,1,  0,0,0,0, 0,0,0,0)].level=0;
-  db[BINARY(1,1,1,1, 1,1,1,1,  0,0,0,0, 0,0,0,0)].op_type=OP_SRC;
-  db[BINARY(1,1,1,1, 1,1,1,1,  0,0,0,0, 0,0,0,0)].op1=SRC_AN;
-  n_fun++;
-
-  db[BINARY(1,1,1,1, 0,0,0,0,  1,1,1,1, 0,0,0,0)].level=0;
-  db[BINARY(1,1,1,1, 0,0,0,0,  1,1,1,1, 0,0,0,0)].op_type=OP_SRC;
-  db[BINARY(1,1,1,1, 0,0,0,0,  1,1,1,1, 0,0,0,0)].op1=SRC_BN;
-  n_fun++;
-
-  db[BINARY(1,1,0,0, 1,1,0,0,  1,1,0,0, 1,1,0,0)].level=0;
-  db[BINARY(1,1,0,0, 1,1,0,0,  1,1,0,0, 1,1,0,0)].op_type=OP_SRC;
-  db[BINARY(1,1,0,0, 1,1,0,0,  1,1,0,0, 1,1,0,0)].op1=SRC_CN;
-  n_fun++;
-
-  db[BINARY(1,0,1,0, 1,0,1,0,  1,0,1,0, 1,0,1,0)].level=0;
-  db[BINARY(1,0,1,0, 1,0,1,0,  1,0,1,0, 1,0,1,0)].op_type=OP_SRC;
-  db[BINARY(1,0,1,0, 1,0,1,0,  1,0,1,0, 1,0,1,0)].op1=SRC_DN;
-  n_fun++;
-#endif
-
-  for(l=0;l<100;l++){
-    printf("calculating level %i\n",l);
-    for(p1=1;p1<65536;p1++){
-      if(db[p1].level==LEVEL_ALOT) continue;
-      max_p2_lev=l-db[p1].level-1;
-      for(p2=p1+1;p2<65536;p2++){
-        if(db[p2].level>max_p2_lev) continue;
-
-        candidate=p1&p2;
-        if(db[candidate].level==LEVEL_ALOT){
-          //found new
-          db[candidate].level=db[p1].level+db[p2].level+1;
-          db[candidate].op_type=OP_AND;
-          db[candidate].op1=p1;
-          db[candidate].op2=p2;
-          n_fun++;
-	}
-
-        candidate=p1|p2;
-        if(db[candidate].level==LEVEL_ALOT){
-          //found new
-          db[candidate].level=db[p1].level+db[p2].level+1;
-          db[candidate].op_type=OP_OR;
-          db[candidate].op1=p1;
-          db[candidate].op2=p2;
-          n_fun++;
-	}
-
-        candidate=p1^p2;
-        if(db[candidate].level==LEVEL_ALOT){
-          //found new
-          db[candidate].level=db[p1].level+db[p2].level+1;
-          db[candidate].op_type=OP_XOR;
-          db[candidate].op1=p1;
-          db[candidate].op2=p2;
-          n_fun++;
-	}
-
-      }
-    }
-    printf("num fun=%i\n\n",n_fun);
-    fflush(stdout);
-    if(n_fun>=65536) break;
-  }
-
-
-  for(i=0;i<65536;i++){
-    if(db[i].level==LEVEL_ALOT) continue;
-
-    printf("PREFIX ");
-    for(j=15;j>=0;j--){
-      printf("%i",i&(1<<j)?1:0);
-      if(j%4==0) printf(" ");
-      if(j%8==0) printf(" ");
-    }
-    printf(" : lev %2i: ",db[i].level);
-    dump_element_prefix(i);
-    printf("\n");
-
-    printf("INFIX  ");
-    for(j=15;j>=0;j--){
-      printf("%i",i&(1<<j)?1:0);
-      if(j%4==0) printf(" ");
-      if(j%8==0) printf(" ");
-    }
-    printf(" : lev %2i: ",db[i].level);
-    dump_element_infix(i);
-    printf("\n");
-  }
-  
-  return 0;
-}
-
-void dump_element_prefix(int e){
-  if(db[e].level==LEVEL_ALOT){
-    printf("PANIC!\n");
-    return;
-  };
-  switch(db[e].op_type){
-  case OP_FALSE:
-    printf("0");
-    break;
-  case OP_TRUE:
-    printf("1");
-    break;
-  case OP_SRC:
-    switch(db[e].op1){
-    case SRC_A:
-      printf("a");
-      break;
-    case SRC_B:
-      printf("b");
-      break;
-    case SRC_C:
-      printf("c");
-      break;
-    case SRC_D:
-      printf("d");
-      break;
-    case SRC_AN:
-      printf("an");
-      break;
-    case SRC_BN:
-      printf("bn");
-      break;
-    case SRC_CN:
-      printf("cn");
-      break;
-    case SRC_DN:
-      printf("dn");
-      break;
-    }
-    break;
-  case OP_AND:
-    printf("FFAND(");
-    dump_element_prefix(db[e].op1);
-    printf(",");
-    dump_element_prefix(db[e].op2);
-    printf(")");
-    break;
-  case OP_OR:
-    printf("FFOR(");
-    dump_element_prefix(db[e].op1);
-    printf(",");
-    dump_element_prefix(db[e].op2);
-    printf(")");
-    break;
-  case OP_XOR:
-    printf("FFXOR(");
-    dump_element_prefix(db[e].op1);
-    printf(",");
-    dump_element_prefix(db[e].op2);
-    printf(")");
-    break;
-  }
-}
-
-void dump_element_infix(int e){
-  if(db[e].level==LEVEL_ALOT){
-    printf("PANIC!\n");
-    return;
-  };
-  switch(db[e].op_type){
-  case OP_FALSE:
-    printf("0");
-    break;
-  case OP_TRUE:
-    printf("1");
-    break;
-  case OP_SRC:
-    switch(db[e].op1){
-    case SRC_A:
-      printf("a");
-      break;
-    case SRC_B:
-      printf("b");
-      break;
-    case SRC_C:
-      printf("c");
-      break;
-    case SRC_D:
-      printf("d");
-      break;
-    case SRC_AN:
-      printf("an");
-      break;
-    case SRC_BN:
-      printf("bn");
-      break;
-    case SRC_CN:
-      printf("cn");
-      break;
-    case SRC_DN:
-      printf("dn");
-      break;
-    }
-    break;
-  case OP_AND:
-    printf("( ");
-    dump_element_infix(db[e].op1);
-    printf("&");
-    dump_element_infix(db[e].op2);
-    printf(" )");
-    break;
-  case OP_OR:
-    printf("( ");
-    dump_element_infix(db[e].op1);
-    printf("|");
-    dump_element_infix(db[e].op2);
-    printf(" )");
-    break;
-  case OP_XOR:
-    printf("( ");
-    dump_element_infix(db[e].op1);
-    printf("^");
-    dump_element_infix(db[e].op2);
-    printf(" )");
-    break;
-  }
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_032_4char.h b/contrib/sasc-ng/FFdecsa/parallel_032_4char.h
deleted file mode 100644
index b9295f8..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_032_4char.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned char s1,s2,s3,s4;
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 32
-
-group static inline FF0(){
-  group res;
-  res.s1=0x0;
-  res.s2=0x0;
-  res.s3=0x0;
-  res.s4=0x0;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  res.s1=0xff;
-  res.s2=0xff;
-  res.s3=0xff;
-  res.s4=0xff;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  res.s3=a.s3&b.s3;
-  res.s4=a.s4&b.s4;
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  res.s3=a.s3|b.s3;
-  res.s4=a.s4|b.s4;
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  res.s3=a.s3^b.s3;
-  res.s4=a.s4^b.s4;
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  res.s1=~a.s1;
-  res.s2=~a.s2;
-  res.s3=~a.s3;
-  res.s4=~a.s4;
-  return res;
-}
-
-
-/* 64 rows of 32 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+g)=*((int *)data);
-  *(((int *)tab)+32+g)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+g);
-  *(((int *)data)+1)=*(((int *)tab)+32+g);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+4*(g+(j>=4?32-1:0))+j);
-  }
-}
-
-struct batch_t{
-  unsigned char s1,s2,s3,s4;
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 4
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  res.s3=a.s3&b.s3;
-  res.s4=a.s4&b.s4;
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  res.s3=a.s3|b.s3;
-  res.s4=a.s4|b.s4;
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  res.s3=a.s3^b.s3;
-  res.s4=a.s4^b.s4;
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  res.s1=0x29;
-  res.s2=0x29;
-  res.s3=0x29;
-  res.s4=0x29;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  res.s1=0x02;
-  res.s2=0x02;
-  res.s3=0x02;
-  res.s4=0x02;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  res.s1=0x04;
-  res.s2=0x04;
-  res.s3=0x04;
-  res.s4=0x04;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  res.s1=0x10;
-  res.s2=0x10;
-  res.s3=0x10;
-  res.s4=0x10;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  res.s1=0x40;
-  res.s2=0x40;
-  res.s3=0x40;
-  res.s4=0x40;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  res.s1=0x80;
-  res.s2=0x80;
-  res.s3=0x80;
-  res.s4=0x80;
-  return res;
-}
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  res.s1=a.s1<<n;
-  res.s2=a.s2<<n;
-  res.s3=a.s3<<n;
-  res.s4=a.s4<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  res.s1=a.s1>>n;
-  res.s2=a.s2>>n;
-  res.s3=a.s3>>n;
-  res.s4=a.s4>>n;
-  return res;
-}
-
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_032_4charA.h b/contrib/sasc-ng/FFdecsa/parallel_032_4charA.h
deleted file mode 100644
index a8f295b..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_032_4charA.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned char s1[4];
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 32
-
-group static inline FF0(){
-  group res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=0x0;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=0xff;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=a.s1[i]&b.s1[i];
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=a.s1[i]|b.s1[i];
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=a.s1[i]^b.s1[i];
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=~a.s1[i];
-  return res;
-}
-
-
-/* 64 rows of 32 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+g)=*((int *)data);
-  *(((int *)tab)+32+g)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+g);
-  *(((int *)data)+1)=*(((int *)tab)+32+g);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+4*(g+(j>=4?32-1:0))+j);
-  }
-}
-
-struct batch_t{
-  unsigned char s1[4];
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 4
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=a.s1[i]&b.s1[i];
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=a.s1[i]|b.s1[i];
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=a.s1[i]^b.s1[i];
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=0x29;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=0x02;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=0x04;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=0x10;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=0x40;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=0x80;
-  return res;
-}
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=a.s1[i]<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  int i;
-  for(i=0;i<4;i++) res.s1[i]=a.s1[i]>>n;
-  return res;
-}
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_032_int.h b/contrib/sasc-ng/FFdecsa/parallel_032_int.h
deleted file mode 100644
index a21fe31..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_032_int.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "parallel_std_def.h"
-
-typedef unsigned int group;
-#define GROUP_PARALLELISM 32
-#define FF0()      0x0
-#define FF1()      0xffffffff
-
-/* 64 rows of 32 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+g)=*((int *)data);
-  *(((int *)tab)+32+g)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+g);
-  *(((int *)data)+1)=*(((int *)tab)+32+g);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+4*(g+(j>=4?32-1:0))+j);
-  }
-}
-
-typedef unsigned int batch;
-#define BYTES_PER_BATCH 4
-#define B_FFN_ALL_29() 0x29292929
-#define B_FFN_ALL_02() 0x02020202
-#define B_FFN_ALL_04() 0x04040404
-#define B_FFN_ALL_10() 0x10101010
-#define B_FFN_ALL_40() 0x40404040
-#define B_FFN_ALL_80() 0x80808080
-
-#define M_EMPTY()
diff --git a/contrib/sasc-ng/FFdecsa/parallel_064_2int.h b/contrib/sasc-ng/FFdecsa/parallel_064_2int.h
deleted file mode 100644
index ffe331a..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_064_2int.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned int s1;
-  unsigned int s2;
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 64
-
-group static inline FF0(){
-  group res;
-  res.s1=0x0;
-  res.s2=0x0;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  res.s1=0xffffffff;
-  res.s2=0xffffffff;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  res.s1=~a.s1;
-  res.s2=~a.s2;
-  return res;
-}
-
-
-/* 64 rows of 64 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+8*g+j);
-  }
-}
-
-struct batch_t{
-  unsigned int s1;
-  unsigned int s2;
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 8
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  res.s1=0x29292929;
-  res.s2=0x29292929;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  res.s1=0x02020202;
-  res.s2=0x02020202;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  res.s1=0x04040404;
-  res.s2=0x04040404;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  res.s1=0x10101010;
-  res.s2=0x10101010;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  res.s1=0x40404040;
-  res.s2=0x40404040;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  res.s1=0x80808080;
-  res.s2=0x80808080;
-  return res;
-}
-
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  res.s1=a.s1<<n;
-  res.s2=a.s2<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  res.s1=a.s1>>n;
-  res.s2=a.s2>>n;
-  return res;
-}
-
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_064_8char.h b/contrib/sasc-ng/FFdecsa/parallel_064_8char.h
deleted file mode 100644
index 956c980..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_064_8char.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned char s1,s2,s3,s4,s5,s6,s7,s8;
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 64
-
-group static inline FF0(){
-  group res;
-  res.s1=0x0;
-  res.s2=0x0;
-  res.s3=0x0;
-  res.s4=0x0;
-  res.s5=0x0;
-  res.s6=0x0;
-  res.s7=0x0;
-  res.s8=0x0;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  res.s1=0xff;
-  res.s2=0xff;
-  res.s3=0xff;
-  res.s4=0xff;
-  res.s5=0xff;
-  res.s6=0xff;
-  res.s7=0xff;
-  res.s8=0xff;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  res.s3=a.s3&b.s3;
-  res.s4=a.s4&b.s4;
-  res.s5=a.s5&b.s5;
-  res.s6=a.s6&b.s6;
-  res.s7=a.s7&b.s7;
-  res.s8=a.s8&b.s8;
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  res.s3=a.s3|b.s3;
-  res.s4=a.s4|b.s4;
-  res.s5=a.s5|b.s5;
-  res.s6=a.s6|b.s6;
-  res.s7=a.s7|b.s7;
-  res.s8=a.s8|b.s8;
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  res.s3=a.s3^b.s3;
-  res.s4=a.s4^b.s4;
-  res.s5=a.s5^b.s5;
-  res.s6=a.s6^b.s6;
-  res.s7=a.s7^b.s7;
-  res.s8=a.s8^b.s8;
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  res.s1=~a.s1;
-  res.s2=~a.s2;
-  res.s3=~a.s3;
-  res.s4=~a.s4;
-  res.s5=~a.s5;
-  res.s6=~a.s6;
-  res.s7=~a.s7;
-  res.s8=~a.s8;
-  return res;
-}
-
-
-/* 64 rows of 64 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+8*g+j);
-  }
-}
-
-struct batch_t{
-  unsigned char s1,s2,s3,s4,s5,s6,s7,s8;
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 8
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  res.s3=a.s3&b.s3;
-  res.s4=a.s4&b.s4;
-  res.s5=a.s5&b.s5;
-  res.s6=a.s6&b.s6;
-  res.s7=a.s7&b.s7;
-  res.s8=a.s8&b.s8;
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  res.s3=a.s3|b.s3;
-  res.s4=a.s4|b.s4;
-  res.s5=a.s5|b.s5;
-  res.s6=a.s6|b.s6;
-  res.s7=a.s7|b.s7;
-  res.s8=a.s8|b.s8;
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  res.s3=a.s3^b.s3;
-  res.s4=a.s4^b.s4;
-  res.s5=a.s5^b.s5;
-  res.s6=a.s6^b.s6;
-  res.s7=a.s7^b.s7;
-  res.s8=a.s8^b.s8;
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  res.s1=0x29;
-  res.s2=0x29;
-  res.s3=0x29;
-  res.s4=0x29;
-  res.s5=0x29;
-  res.s6=0x29;
-  res.s7=0x29;
-  res.s8=0x29;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  res.s1=0x02;
-  res.s2=0x02;
-  res.s3=0x02;
-  res.s4=0x02;
-  res.s5=0x02;
-  res.s6=0x02;
-  res.s7=0x02;
-  res.s8=0x02;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  res.s1=0x04;
-  res.s2=0x04;
-  res.s3=0x04;
-  res.s4=0x04;
-  res.s5=0x04;
-  res.s6=0x04;
-  res.s7=0x04;
-  res.s8=0x04;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  res.s1=0x10;
-  res.s2=0x10;
-  res.s3=0x10;
-  res.s4=0x10;
-  res.s5=0x10;
-  res.s6=0x10;
-  res.s7=0x10;
-  res.s8=0x10;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  res.s1=0x40;
-  res.s2=0x40;
-  res.s3=0x40;
-  res.s4=0x40;
-  res.s5=0x40;
-  res.s6=0x40;
-  res.s7=0x40;
-  res.s8=0x40;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  res.s1=0x80;
-  res.s2=0x80;
-  res.s3=0x80;
-  res.s4=0x80;
-  res.s5=0x80;
-  res.s6=0x80;
-  res.s7=0x80;
-  res.s8=0x80;
-  return res;
-}
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  res.s1=a.s1<<n;
-  res.s2=a.s2<<n;
-  res.s3=a.s3<<n;
-  res.s4=a.s4<<n;
-  res.s5=a.s5<<n;
-  res.s6=a.s6<<n;
-  res.s7=a.s7<<n;
-  res.s8=a.s8<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  res.s1=a.s1>>n;
-  res.s2=a.s2>>n;
-  res.s3=a.s3>>n;
-  res.s4=a.s4>>n;
-  res.s5=a.s5>>n;
-  res.s6=a.s6>>n;
-  res.s7=a.s7>>n;
-  res.s8=a.s8>>n;
-  return res;
-}
-
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_064_8charA.h b/contrib/sasc-ng/FFdecsa/parallel_064_8charA.h
deleted file mode 100644
index b99490b..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_064_8charA.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned char s1[8];
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 64
-
-group static inline FF0(){
-  group res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=0x0;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=0xff;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=a.s1[i]&b.s1[i];
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=a.s1[i]|b.s1[i];
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=a.s1[i]^b.s1[i];
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=~a.s1[i];
-  return res;
-}
-
-
-/* 64 rows of 64 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+8*g+j);
-  }
-}
-
-struct batch_t{
-  unsigned char s1[8];
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 8
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=a.s1[i]&b.s1[i];
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=a.s1[i]|b.s1[i];
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=a.s1[i]^b.s1[i];
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=0x29;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=0x02;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=0x04;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=0x10;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=0x40;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=0x80;
-  return res;
-}
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=a.s1[i]<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  int i;
-  for(i=0;i<8;i++) res.s1[i]=a.s1[i]>>n;
-  return res;
-}
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_064_long.h b/contrib/sasc-ng/FFdecsa/parallel_064_long.h
deleted file mode 100644
index 09f7b95..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_064_long.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2007 Dark Avenger
- *               2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "parallel_std_def.h"
-
-typedef unsigned long long group;
-#define GROUP_PARALLELISM 64
-#define FF0() 0x0ULL
-#define FF1() 0xffffffffffffffffULL
-
-typedef unsigned long long batch;
-#define BYTES_PER_BATCH 8
-#define B_FFN_ALL_29() 0x2929292929292929ULL
-#define B_FFN_ALL_02() 0x0202020202020202ULL
-#define B_FFN_ALL_04() 0x0404040404040404ULL
-#define B_FFN_ALL_10() 0x1010101010101010ULL
-#define B_FFN_ALL_40() 0x4040404040404040ULL
-#define B_FFN_ALL_80() 0x8080808080808080ULL
-
-#define M_EMPTY()
-
-#include "fftable.h"
diff --git a/contrib/sasc-ng/FFdecsa/parallel_064_mmx.h b/contrib/sasc-ng/FFdecsa/parallel_064_mmx.h
deleted file mode 100644
index 3979233..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_064_mmx.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2007 Dark Avenger
- *               2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <mmintrin.h>
-
-#define MEMALIGN __attribute__((aligned(16)))
-
-union __u64 {
-    unsigned int u[2];
-    __m64 v;
-};
-
-static const union __u64 ff0 = {{0x00000000U, 0x00000000U}};
-static const union __u64 ff1 = {{0xffffffffU, 0xffffffffU}};
-
-typedef __m64 group;
-#define GROUP_PARALLELISM 64
-#define FF0()      ff0.v
-#define FF1()      ff1.v
-#define FFAND(a,b) _mm_and_si64((a),(b))
-#define FFOR(a,b)  _mm_or_si64((a),(b))
-#define FFXOR(a,b) _mm_xor_si64((a),(b))
-#define FFNOT(a)   _mm_xor_si64((a),FF1())
-
-/* 64 rows of 64 bits */
-
-static const union __u64 ff29 = {{0x29292929U, 0x29292929U}};
-static const union __u64 ff02 = {{0x02020202U, 0x02020202U}};
-static const union __u64 ff04 = {{0x04040404U, 0x04040404U}};
-static const union __u64 ff10 = {{0x10101010U, 0x10101010U}};
-static const union __u64 ff40 = {{0x40404040U, 0x40404040U}};
-static const union __u64 ff80 = {{0x80808080U, 0x80808080U}};
-
-typedef __m64 batch;
-#define BYTES_PER_BATCH 8
-#define B_FFAND(a,b) FFAND((a),(b))
-#define B_FFOR(a,b)  FFOR((a),(b))
-#define B_FFXOR(a,b) FFXOR((a),(b))
-#define B_FFN_ALL_29() ff29.v
-#define B_FFN_ALL_02() ff02.v
-#define B_FFN_ALL_04() ff04.v
-#define B_FFN_ALL_10() ff10.v
-#define B_FFN_ALL_40() ff40.v
-#define B_FFN_ALL_80() ff80.v
-#define B_FFSH8L(a,n) _mm_slli_si64((a),(n))
-#define B_FFSH8R(a,n) _mm_srli_si64((a),(n))
-
-#define M_EMPTY() _mm_empty()
-
-
-#undef XOR_8_BY
-#define XOR_8_BY(d,s1,s2)    do { *(__m64*)d = _mm_xor_si64(*(__m64*)(s1), *(__m64*)(s2)); } while(0)
-
-#undef XOREQ_8_BY
-#define XOREQ_8_BY(d,s)      XOR_8_BY(d, d, s)
-
-#undef COPY_8_BY
-#define COPY_8_BY(d,s)       do { *(__m64 *)(d) = *(__m64 *)(s); } while(0)
-
-#undef BEST_SPAN
-#define BEST_SPAN            8
-
-#undef XOR_BEST_BY
-#define XOR_BEST_BY(d,s1,s2) XOR_8_BY(d,s1,s2)
-
-#include "fftable.h"
diff --git a/contrib/sasc-ng/FFdecsa/parallel_128_16char.h b/contrib/sasc-ng/FFdecsa/parallel_128_16char.h
deleted file mode 100644
index ed28c61..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_128_16char.h
+++ /dev/null
@@ -1,411 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned char s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16;
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 128
-
-group static inline FF0(){
-  group res;
-  res.s1=0x0;
-  res.s2=0x0;
-  res.s3=0x0;
-  res.s4=0x0;
-  res.s5=0x0;
-  res.s6=0x0;
-  res.s7=0x0;
-  res.s8=0x0;
-  res.s9=0x0;
-  res.s10=0x0;
-  res.s11=0x0;
-  res.s12=0x0;
-  res.s13=0x0;
-  res.s14=0x0;
-  res.s15=0x0;
-  res.s16=0x0;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  res.s1=0xff;
-  res.s2=0xff;
-  res.s3=0xff;
-  res.s4=0xff;
-  res.s5=0xff;
-  res.s6=0xff;
-  res.s7=0xff;
-  res.s8=0xff;
-  res.s9=0xff;
-  res.s10=0xff;
-  res.s11=0xff;
-  res.s12=0xff;
-  res.s13=0xff;
-  res.s14=0xff;
-  res.s15=0xff;
-  res.s16=0xff;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  res.s3=a.s3&b.s3;
-  res.s4=a.s4&b.s4;
-  res.s5=a.s5&b.s5;
-  res.s6=a.s6&b.s6;
-  res.s7=a.s7&b.s7;
-  res.s8=a.s8&b.s8;
-  res.s9=a.s9&b.s9;
-  res.s10=a.s10&b.s10;
-  res.s11=a.s11&b.s11;
-  res.s12=a.s12&b.s12;
-  res.s13=a.s13&b.s13;
-  res.s14=a.s14&b.s14;
-  res.s15=a.s15&b.s15;
-  res.s16=a.s16&b.s16;
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  res.s3=a.s3|b.s3;
-  res.s4=a.s4|b.s4;
-  res.s5=a.s5|b.s5;
-  res.s6=a.s6|b.s6;
-  res.s7=a.s7|b.s7;
-  res.s8=a.s8|b.s8;
-  res.s9=a.s9|b.s9;
-  res.s10=a.s10|b.s10;
-  res.s11=a.s11|b.s11;
-  res.s12=a.s12|b.s12;
-  res.s13=a.s13|b.s13;
-  res.s14=a.s14|b.s14;
-  res.s15=a.s15|b.s15;
-  res.s16=a.s16|b.s16;
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  res.s3=a.s3^b.s3;
-  res.s4=a.s4^b.s4;
-  res.s5=a.s5^b.s5;
-  res.s6=a.s6^b.s6;
-  res.s7=a.s7^b.s7;
-  res.s8=a.s8^b.s8;
-  res.s9=a.s9^b.s9;
-  res.s10=a.s10^b.s10;
-  res.s11=a.s11^b.s11;
-  res.s12=a.s12^b.s12;
-  res.s13=a.s13^b.s13;
-  res.s14=a.s14^b.s14;
-  res.s15=a.s15^b.s15;
-  res.s16=a.s16^b.s16;
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  res.s1=~a.s1;
-  res.s2=~a.s2;
-  res.s3=~a.s3;
-  res.s4=~a.s4;
-  res.s5=~a.s5;
-  res.s6=~a.s6;
-  res.s7=~a.s7;
-  res.s8=~a.s8;
-  res.s9=~a.s9;
-  res.s10=~a.s10;
-  res.s11=~a.s11;
-  res.s12=~a.s12;
-  res.s13=~a.s13;
-  res.s14=~a.s14;
-  res.s15=~a.s15;
-  res.s16=~a.s16;
-  return res;
-}
-
-
-/* 64 rows of 128 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+8*g+j);
-  }
-}
-
-
-struct batch_t{
-  unsigned char s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16;
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 16
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  res.s3=a.s3&b.s3;
-  res.s4=a.s4&b.s4;
-  res.s5=a.s5&b.s5;
-  res.s6=a.s6&b.s6;
-  res.s7=a.s7&b.s7;
-  res.s8=a.s8&b.s8;
-  res.s9=a.s9&b.s9;
-  res.s10=a.s10&b.s10;
-  res.s11=a.s11&b.s11;
-  res.s12=a.s12&b.s12;
-  res.s13=a.s13&b.s13;
-  res.s14=a.s14&b.s14;
-  res.s15=a.s15&b.s15;
-  res.s16=a.s16&b.s16;
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  res.s3=a.s3|b.s3;
-  res.s4=a.s4|b.s4;
-  res.s5=a.s5|b.s5;
-  res.s6=a.s6|b.s6;
-  res.s7=a.s7|b.s7;
-  res.s8=a.s8|b.s8;
-  res.s9=a.s9|b.s9;
-  res.s10=a.s10|b.s10;
-  res.s11=a.s11|b.s11;
-  res.s12=a.s12|b.s12;
-  res.s13=a.s13|b.s13;
-  res.s14=a.s14|b.s14;
-  res.s15=a.s15|b.s15;
-  res.s16=a.s16|b.s16;
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  res.s3=a.s3^b.s3;
-  res.s4=a.s4^b.s4;
-  res.s5=a.s5^b.s5;
-  res.s6=a.s6^b.s6;
-  res.s7=a.s7^b.s7;
-  res.s8=a.s8^b.s8;
-  res.s9=a.s9^b.s9;
-  res.s10=a.s10^b.s10;
-  res.s11=a.s11^b.s11;
-  res.s12=a.s12^b.s12;
-  res.s13=a.s13^b.s13;
-  res.s14=a.s14^b.s14;
-  res.s15=a.s15^b.s15;
-  res.s16=a.s16^b.s16;
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  res.s1=0x29;
-  res.s2=0x29;
-  res.s3=0x29;
-  res.s4=0x29;
-  res.s5=0x29;
-  res.s6=0x29;
-  res.s7=0x29;
-  res.s8=0x29;
-  res.s9=0x29;
-  res.s10=0x29;
-  res.s11=0x29;
-  res.s12=0x29;
-  res.s13=0x29;
-  res.s14=0x29;
-  res.s15=0x29;
-  res.s16=0x29;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  res.s1=0x02;
-  res.s2=0x02;
-  res.s3=0x02;
-  res.s4=0x02;
-  res.s5=0x02;
-  res.s6=0x02;
-  res.s7=0x02;
-  res.s8=0x02;
-  res.s9=0x02;
-  res.s10=0x02;
-  res.s11=0x02;
-  res.s12=0x02;
-  res.s13=0x02;
-  res.s14=0x02;
-  res.s15=0x02;
-  res.s16=0x02;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  res.s1=0x04;
-  res.s2=0x04;
-  res.s3=0x04;
-  res.s4=0x04;
-  res.s5=0x04;
-  res.s6=0x04;
-  res.s7=0x04;
-  res.s8=0x04;
-  res.s9=0x04;
-  res.s10=0x04;
-  res.s11=0x04;
-  res.s12=0x04;
-  res.s13=0x04;
-  res.s14=0x04;
-  res.s15=0x04;
-  res.s16=0x04;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  res.s1=0x10;
-  res.s2=0x10;
-  res.s3=0x10;
-  res.s4=0x10;
-  res.s5=0x10;
-  res.s6=0x10;
-  res.s7=0x10;
-  res.s8=0x10;
-  res.s9=0x10;
-  res.s10=0x10;
-  res.s11=0x10;
-  res.s12=0x10;
-  res.s13=0x10;
-  res.s14=0x10;
-  res.s15=0x10;
-  res.s16=0x10;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  res.s1=0x40;
-  res.s2=0x40;
-  res.s3=0x40;
-  res.s4=0x40;
-  res.s5=0x40;
-  res.s6=0x40;
-  res.s7=0x40;
-  res.s8=0x40;
-  res.s9=0x40;
-  res.s10=0x40;
-  res.s11=0x40;
-  res.s12=0x40;
-  res.s13=0x40;
-  res.s14=0x40;
-  res.s15=0x40;
-  res.s16=0x40;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  res.s1=0x80;
-  res.s2=0x80;
-  res.s3=0x80;
-  res.s4=0x80;
-  res.s5=0x80;
-  res.s6=0x80;
-  res.s7=0x80;
-  res.s8=0x80;
-  res.s9=0x80;
-  res.s10=0x80;
-  res.s11=0x80;
-  res.s12=0x80;
-  res.s13=0x80;
-  res.s14=0x80;
-  res.s15=0x80;
-  res.s16=0x80;
-  return res;
-}
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  res.s1=a.s1<<n;
-  res.s2=a.s2<<n;
-  res.s3=a.s3<<n;
-  res.s4=a.s4<<n;
-  res.s5=a.s5<<n;
-  res.s6=a.s6<<n;
-  res.s7=a.s7<<n;
-  res.s8=a.s8<<n;
-  res.s9=a.s9<<n;
-  res.s10=a.s10<<n;
-  res.s11=a.s11<<n;
-  res.s12=a.s12<<n;
-  res.s13=a.s13<<n;
-  res.s14=a.s14<<n;
-  res.s15=a.s15<<n;
-  res.s16=a.s16<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  res.s1=a.s1>>n;
-  res.s2=a.s2>>n;
-  res.s3=a.s3>>n;
-  res.s4=a.s4>>n;
-  res.s5=a.s5>>n;
-  res.s6=a.s6>>n;
-  res.s7=a.s7>>n;
-  res.s8=a.s8>>n;
-  res.s9=a.s9>>n;
-  res.s10=a.s10>>n;
-  res.s11=a.s11>>n;
-  res.s12=a.s12>>n;
-  res.s13=a.s13>>n;
-  res.s14=a.s14>>n;
-  res.s15=a.s15>>n;
-  res.s16=a.s16>>n;
-  return res;
-}
-
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_128_16charA.h b/contrib/sasc-ng/FFdecsa/parallel_128_16charA.h
deleted file mode 100644
index 2a0daa1..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_128_16charA.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned char s1[16];
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 128
-
-group static inline FF0(){
-  group res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=0x0;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=0xff;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=a.s1[i]&b.s1[i];
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=a.s1[i]|b.s1[i];
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=a.s1[i]^b.s1[i];
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=~a.s1[i];
-  return res;
-}
-
-
-/* 64 rows of 128 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+8*g+j);
-  }
-}
-
-
-struct batch_t{
-  unsigned char s1[16];
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 16
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=a.s1[i]&b.s1[i];
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=a.s1[i]|b.s1[i];
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=a.s1[i]^b.s1[i];
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=0x29;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=0x02;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=0x04;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=0x10;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=0x40;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=0x80;
-  return res;
-}
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=a.s1[i]<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  int i;
-  for(i=0;i<16;i++) res.s1[i]=a.s1[i]>>n;
-  return res;
-}
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_128_2long.h b/contrib/sasc-ng/FFdecsa/parallel_128_2long.h
deleted file mode 100644
index 1a3bdd9..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_128_2long.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned long long int s1;
-  unsigned long long int s2;
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 128
-
-group static inline FF0(){
-  group res;
-  res.s1=0x0ULL;
-  res.s2=0x0ULL;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  res.s1=0xffffffffffffffffULL;
-  res.s2=0xffffffffffffffffULL;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  res.s1=~a.s1;
-  res.s2=~a.s2;
-  return res;
-}
-
-
-/* 64 rows of 128 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+8*g+j);
-  }
-}
-
-
-struct batch_t{
-  unsigned long long int s1;
-  unsigned long long int s2;
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 16
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  res.s1=0x2929292929292929ULL;
-  res.s2=0x2929292929292929ULL;
-  return res;
-}
-
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  res.s1=0x0202020202020202ULL;
-  res.s2=0x0202020202020202ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  res.s1=0x0404040404040404ULL;
-  res.s2=0x0404040404040404ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  res.s1=0x1010101010101010ULL;
-  res.s2=0x1010101010101010ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  res.s1=0x4040404040404040ULL;
-  res.s2=0x4040404040404040ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  res.s1=0x8080808080808080ULL;
-  res.s2=0x8080808080808080ULL;
-  return res;
-}
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  res.s1=a.s1<<n;
-  res.s2=a.s2<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  res.s1=a.s1>>n;
-  res.s2=a.s2>>n;
-  return res;
-}
-
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_128_2mmx.h b/contrib/sasc-ng/FFdecsa/parallel_128_2mmx.h
deleted file mode 100644
index 4afb7a7..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_128_2mmx.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#include <mmintrin.h>
-
-#define MEMALIGN __attribute__((aligned(16)))
-
-struct group_t{
-  __m64 s1,s2;
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 128
-
-group static inline FF0(){
-  group res;
-  res.s1=(__m64)0x0ULL;
-  res.s2=(__m64)0x0ULL;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  res.s1=(__m64)0xffffffffffffffffULL;
-  res.s2=(__m64)0xffffffffffffffffULL;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  res.s1=_m_pand(a.s1,b.s1);
-  res.s2=_m_pand(a.s2,b.s2);
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  res.s1=_m_por(a.s1,b.s1);
-  res.s2=_m_por(a.s2,b.s2);
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  res.s1=_m_pxor(a.s1,b.s1);
-  res.s2=_m_pxor(a.s2,b.s2);
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  res.s1=_m_pxor(a.s1,FF1().s1);
-  res.s2=_m_pxor(a.s2,FF1().s2);
-  return res;
-}
-
-
-/* 64 rows of 128 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+8*g+j);
-  }
-}
-
-
-struct batch_t{
-  __m64 s1,s2;
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 16
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  res.s1=_m_pand(a.s1,b.s1);
-  res.s2=_m_pand(a.s2,b.s2);
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  res.s1=_m_por(a.s1,b.s1);
-  res.s2=_m_por(a.s2,b.s2);
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  res.s1=_m_pxor(a.s1,b.s1);
-  res.s2=_m_pxor(a.s2,b.s2);
-  return res;
-}
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  res.s1=(__m64)0x2929292929292929ULL;
-  res.s2=(__m64)0x2929292929292929ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  res.s1=(__m64)0x0202020202020202ULL;
-  res.s2=(__m64)0x0202020202020202ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  res.s1=(__m64)0x0404040404040404ULL;
-  res.s2=(__m64)0x0404040404040404ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  res.s1=(__m64)0x1010101010101010ULL;
-  res.s2=(__m64)0x1010101010101010ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  res.s1=(__m64)0x4040404040404040ULL;
-  res.s2=(__m64)0x4040404040404040ULL;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  res.s1=(__m64)0x8080808080808080ULL;
-  res.s2=(__m64)0x8080808080808080ULL;
-  return res;
-}
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  res.s1=_m_psllqi(a.s1,n);
-  res.s2=_m_psllqi(a.s2,n);
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  res.s1=_m_psrlqi(a.s1,n);
-  res.s2=_m_psrlqi(a.s2,n);
-  return res;
-}
-
-void static inline M_EMPTY(void){
-  _m_empty();
-}
-
-
-#undef XOR_8_BY
-#define XOR_8_BY(d,s1,s2)    do{ __m64 *pd=(__m64 *)(d), *ps1=(__m64 *)(s1), *ps2=(__m64 *)(s2); \
-                                 *pd = _m_pxor( *ps1 , *ps2 ); }while(0)
-
-#undef XOREQ_8_BY
-#define XOREQ_8_BY(d,s)      do{ __m64 *pd=(__m64 *)(d), *ps=(__m64 *)(s); \
-                                 *pd = _m_pxor( *ps, *pd ); }while(0)
-
-#undef COPY_8_BY
-#define COPY_8_BY(d,s)       do{ __m64 *pd=(__m64 *)(d), *ps=(__m64 *)(s); \
-                                 *pd =  *ps; }while(0)
-
-#undef BEST_SPAN
-#define BEST_SPAN            8
-
-#undef XOR_BEST_BY
-#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0);
-
-#undef XOREQ_BEST_BY
-#define XOREQ_BEST_BY(d,s)   do{ XOREQ_8_BY(d,s); }while(0);
-
-#undef COPY_BEST_BY
-#define COPY_BEST_BY(d,s)    do{ COPY_8_BY(d,s); }while(0);
diff --git a/contrib/sasc-ng/FFdecsa/parallel_128_4int.h b/contrib/sasc-ng/FFdecsa/parallel_128_4int.h
deleted file mode 100644
index 79b95f1..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_128_4int.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-struct group_t{
-  unsigned int s1,s2,s3,s4;
-};
-typedef struct group_t group;
-
-#define GROUP_PARALLELISM 128
-
-group static inline FF0(){
-  group res;
-  res.s1=0x0;
-  res.s2=0x0;
-  res.s3=0x0;
-  res.s4=0x0;
-  return res;
-}
-
-group static inline FF1(){
-  group res;
-  res.s1=0xffffffff;
-  res.s2=0xffffffff;
-  res.s3=0xffffffff;
-  res.s4=0xffffffff;
-  return res;
-}
-
-group static inline FFAND(group a,group b){
-  group res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  res.s3=a.s3&b.s3;
-  res.s4=a.s4&b.s4;
-  return res;
-}
-
-group static inline FFOR(group a,group b){
-  group res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  res.s3=a.s3|b.s3;
-  res.s4=a.s4|b.s4;
-  return res;
-}
-
-group static inline FFXOR(group a,group b){
-  group res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  res.s3=a.s3^b.s3;
-  res.s4=a.s4^b.s4;
-  return res;
-}
-
-group static inline FFNOT(group a){
-  group res;
-  res.s1=~a.s1;
-  res.s2=~a.s2;
-  res.s3=~a.s3;
-  res.s4=~a.s4;
-  return res;
-}
-
-
-/* 64 rows of 128 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
-  *(((int *)tab)+2*g)=*((int *)data);
-  *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
-  *((int *)data)=*(((int *)tab)+2*g);
-  *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
-  int j;
-  for(j=0;j<n;j++){
-    *(data+j)^=*(tab+8*g+j);
-  }
-}
-
-
-struct batch_t{
-  unsigned int s1,s2,s3,s4;
-};
-typedef struct batch_t batch;
-
-#define BYTES_PER_BATCH 16
-
-batch static inline B_FFAND(batch a,batch b){
-  batch res;
-  res.s1=a.s1&b.s1;
-  res.s2=a.s2&b.s2;
-  res.s3=a.s3&b.s3;
-  res.s4=a.s4&b.s4;
-  return res;
-}
-
-batch static inline B_FFOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1|b.s1;
-  res.s2=a.s2|b.s2;
-  res.s3=a.s3|b.s3;
-  res.s4=a.s4|b.s4;
-  return res;
-}
-
-batch static inline B_FFXOR(batch a,batch b){
-  batch res;
-  res.s1=a.s1^b.s1;
-  res.s2=a.s2^b.s2;
-  res.s3=a.s3^b.s3;
-  res.s4=a.s4^b.s4;
-  return res;
-}
-
-
-batch static inline B_FFN_ALL_29(){
-  batch res;
-  res.s1=0x29292929;
-  res.s2=0x29292929;
-  res.s3=0x29292929;
-  res.s4=0x29292929;
-  return res;
-}
-batch static inline B_FFN_ALL_02(){
-  batch res;
-  res.s1=0x02020202;
-  res.s2=0x02020202;
-  res.s3=0x02020202;
-  res.s4=0x02020202;
-  return res;
-}
-batch static inline B_FFN_ALL_04(){
-  batch res;
-  res.s1=0x04040404;
-  res.s2=0x04040404;
-  res.s3=0x04040404;
-  res.s4=0x04040404;
-  return res;
-}
-batch static inline B_FFN_ALL_10(){
-  batch res;
-  res.s1=0x10101010;
-  res.s2=0x10101010;
-  res.s3=0x10101010;
-  res.s4=0x10101010;
-  return res;
-}
-batch static inline B_FFN_ALL_40(){
-  batch res;
-  res.s1=0x40404040;
-  res.s2=0x40404040;
-  res.s3=0x40404040;
-  res.s4=0x40404040;
-  return res;
-}
-batch static inline B_FFN_ALL_80(){
-  batch res;
-  res.s1=0x80808080;
-  res.s2=0x80808080;
-  res.s3=0x80808080;
-  res.s4=0x80808080;
-  return res;
-}
-
-batch static inline B_FFSH8L(batch a,int n){
-  batch res;
-  res.s1=a.s1<<n;
-  res.s2=a.s2<<n;
-  res.s3=a.s3<<n;
-  res.s4=a.s4<<n;
-  return res;
-}
-
-batch static inline B_FFSH8R(batch a,int n){
-  batch res;
-  res.s1=a.s1>>n;
-  res.s2=a.s2>>n;
-  res.s3=a.s3>>n;
-  res.s4=a.s4>>n;
-  return res;
-}
-
-
-void static inline M_EMPTY(void){
-}
diff --git a/contrib/sasc-ng/FFdecsa/parallel_128_sse.h b/contrib/sasc-ng/FFdecsa/parallel_128_sse.h
deleted file mode 100644
index a26e6b3..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_128_sse.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2007 Dark Avenger
- *               2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#include <xmmintrin.h>
-
-#define MEMALIGN __attribute__((aligned(16)))
-
-union __u128 {
-    unsigned int u[4];
-    __m128 v;
-};
-
-static const union __u128 ff0 = {{0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U}};
-static const union __u128 ff1 = {{0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU}};
-
-typedef __m128 group;
-#define GROUP_PARALLELISM 128
-#define FF0() ff0.v
-#define FF1() ff1.v
-#define FFAND(a,b) _mm_and_ps((a),(b))
-#define FFOR(a,b)  _mm_or_ps((a),(b))
-#define FFXOR(a,b) _mm_xor_ps((a),(b))
-#define FFNOT(a)   _mm_xor_ps((a),FF1())
-#define MALLOC(X)  _mm_malloc(X,16)
-#define FREE(X)    _mm_free(X)
-
-union __u64 {
-    unsigned int u[2];
-    __m64 v;
-};
-
-static const union __u64 ff29 = {{0x29292929U, 0x29292929U}};
-static const union __u64 ff02 = {{0x02020202U, 0x02020202U}};
-static const union __u64 ff04 = {{0x04040404U, 0x04040404U}};
-static const union __u64 ff10 = {{0x10101010U, 0x10101010U}};
-static const union __u64 ff40 = {{0x40404040U, 0x40404040U}};
-static const union __u64 ff80 = {{0x80808080U, 0x80808080U}};
-
-typedef __m64 batch;
-#define BYTES_PER_BATCH 8
-#define B_FFN_ALL_29() ff29.v
-#define B_FFN_ALL_02() ff02.v
-#define B_FFN_ALL_04() ff04.v
-#define B_FFN_ALL_10() ff10.v
-#define B_FFN_ALL_40() ff40.v
-#define B_FFN_ALL_80() ff80.v
-#define B_FFAND(a,b)  _mm_and_si64((a),(b))
-#define B_FFOR(a,b)   _mm_or_si64((a),(b))
-#define B_FFXOR(a,b)  _mm_xor_si64((a),(b))
-#define B_FFSH8L(a,n) _mm_slli_si64((a),(n))
-#define B_FFSH8R(a,n) _mm_srli_si64((a),(n))
-
-#define M_EMPTY()     _mm_empty()
-
-
-#undef XOR_8_BY
-#define XOR_8_BY(d,s1,s2)    do { *(__m64*)d = _mm_xor_si64(*(__m64*)(s1), *(__m64*)(s2)); } while(0)
-
-#undef XOREQ_8_BY
-#define XOREQ_8_BY(d,s)      XOR_8_BY(d, d, s)
-
-#undef COPY_8_BY
-#define COPY_8_BY(d,s)       do { *(__m64 *)(d) = *(__m64 *)(s); } while(0)
-
-#undef BEST_SPAN
-#define BEST_SPAN            16
-
-#undef XOR_BEST_BY
-static inline void XOR_BEST_BY(unsigned char *d, unsigned char *s1, unsigned char *s2)
-{
-    __m128 vs1 = _mm_load_ps((float*)s1);
-    __m128 vs2 = _mm_load_ps((float*)s2);
-    vs1 = _mm_xor_ps(vs1, vs2);
-    _mm_store_ps((float*)d, vs1);
-}
-
-#include "fftable.h"
diff --git a/contrib/sasc-ng/FFdecsa/parallel_128_sse2.h b/contrib/sasc-ng/FFdecsa/parallel_128_sse2.h
deleted file mode 100644
index 5a537a9..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_128_sse2.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2007 Dark Avenger
- *               2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <emmintrin.h>
-
-#define MEMALIGN __attribute__((aligned(16)))
-
-union __u128i {
-	unsigned int u[4];
-	__m128i v;
-};
-
-static const union __u128i ff0 = {{0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U}};
-static const union __u128i ff1 = {{0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU}};
-
-typedef __m128i group;
-#define GROUP_PARALLELISM 128
-#define FF0() ff0.v
-#define FF1() ff1.v
-#define FFAND(a,b) _mm_and_si128((a),(b))
-#define FFOR(a,b)  _mm_or_si128((a),(b))
-#define FFXOR(a,b) _mm_xor_si128((a),(b))
-#define FFNOT(a)   _mm_xor_si128((a),FF1())
-#define MALLOC(X)  _mm_malloc(X,16)
-#define FREE(X)    _mm_free(X)
-
-/* BATCH */
-
-static const union __u128i ff29 = {{0x29292929U, 0x29292929U, 0x29292929U, 0x29292929U}};
-static const union __u128i ff02 = {{0x02020202U, 0x02020202U, 0x02020202U, 0x02020202U}};
-static const union __u128i ff04 = {{0x04040404U, 0x04040404U, 0x04040404U, 0x04040404U}};
-static const union __u128i ff10 = {{0x10101010U, 0x10101010U, 0x10101010U, 0x10101010U}};
-static const union __u128i ff40 = {{0x40404040U, 0x40404040U, 0x40404040U, 0x40404040U}};
-static const union __u128i ff80 = {{0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U}};
-
-typedef __m128i batch;
-#define BYTES_PER_BATCH 16
-#define B_FFN_ALL_29() ff29.v
-#define B_FFN_ALL_02() ff02.v
-#define B_FFN_ALL_04() ff04.v
-#define B_FFN_ALL_10() ff10.v
-#define B_FFN_ALL_40() ff40.v
-#define B_FFN_ALL_80() ff80.v
-
-#define B_FFAND(a,b) FFAND(a,b)
-#define B_FFOR(a,b)  FFOR(a,b)
-#define B_FFXOR(a,b) FFXOR(a,b)
-#define B_FFSH8L(a,n) _mm_slli_epi64((a),(n))
-#define B_FFSH8R(a,n) _mm_srli_epi64((a),(n))
-
-#define M_EMPTY()
-
-#undef BEST_SPAN
-#define BEST_SPAN            16
-
-#undef XOR_BEST_BY
-static inline void XOR_BEST_BY(unsigned char *d, unsigned char *s1, unsigned char *s2)
-{
-	__m128i vs1 = _mm_load_si128((__m128i*)s1);
-	__m128i vs2 = _mm_load_si128((__m128i*)s2);
-	vs1 = _mm_xor_si128(vs1, vs2);
-	_mm_store_si128((__m128i*)d, vs1);
-}
-
-#include "fftable.h"
diff --git a/contrib/sasc-ng/FFdecsa/parallel_generic.h b/contrib/sasc-ng/FFdecsa/parallel_generic.h
deleted file mode 100644
index 2af4c1c..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_generic.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-
-#if 0
-//// generics
-#define COPY4BY(d,s)     do{ int *pd=(int *)(d), *ps=(int *)(s); \
-                             *pd = *ps; }while(0)
-#define COPY8BY(d,s)     do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
-                             *pd = *ps; }while(0)
-#define COPY16BY(d,s)    do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
-                             *pd = *ps; \
-			     *(pd+1) = *(ps+1); }while(0)
-#define COPY32BY(d,s)    do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
-                             *pd = *ps; \
-			     *(pd+1) = *(ps+1) \
-			     *(pd+2) = *(ps+2) \
-			     *(pd+3) = *(ps+3); }while(0)
-#define XOR4BY(d,s1,s2)  do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \
-                             *pd = *ps1  ^ *ps2; }while(0)
-#define XOR8BY(d,s1,s2)  do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
-                             *pd = *ps1  ^ *ps2; }while(0)
-#define XOR16BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
-                             *pd = *ps1  ^ *ps2; \
-                             *(pd+8) = *(ps1+8)  ^ *(ps2+8); }while(0)
-#define XOR32BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
-                             *pd = *ps1  ^ *ps2; \
-                             *(pd+1) = *(ps1+1)  ^ *(ps2+1); \
-                             *(pd+2) = *(ps1+2)  ^ *(ps2+2); \
-                             *(pd+3) = *(ps1+3)  ^ *(ps2+3); }while(0)
-#define XOR32BV(d,s1,s2) do{ int *const pd=(int *const)(d), *ps1=(const int *const)(s1), *ps2=(const int *const)(s2); \
-                             int z; \
-			     for(z=0;z<8;z++){ \
-                               pd[z]=ps1[z]^ps2[z]; \
-			     } \
-                           }while(0)
-#define XOREQ4BY(d,s)    do{ int *pd=(int *)(d), *ps=(int *)(s); \
-                             *pd ^= *ps; }while(0)
-#define XOREQ8BY(d,s)    do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
-                             *pd ^= *ps; }while(0)
-#define XOREQ16BY(d,s)   do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
-                             *pd ^= *ps; \
-			     *(pd+1) ^=*(ps+1); }while(0)
-#define XOREQ32BY(d,s)   do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
-                             *pd ^= *ps; \
-			     *(pd+1) ^=*(ps+1); \
-			     *(pd+2) ^=*(ps+2); \
-			     *(pd+3) ^=*(ps+3); }while(0)
-#define XOREQ32BY4(d,s)  do{ int *pd=(int *)(d), *ps=(int *)(s); \
-                             *pd ^= *ps; \
-			     *(pd+1) ^=*(ps+1); \
-			     *(pd+2) ^=*(ps+2); \
-			     *(pd+3) ^=*(ps+3); \
-			     *(pd+4) ^=*(ps+4); \
-			     *(pd+5) ^=*(ps+5); \
-			     *(pd+6) ^=*(ps+6); \
-			     *(pd+7) ^=*(ps+7); }while(0)
-#define XOREQ32BV(d,s)   do{ unsigned char *pd=(unsigned char *)(d), *ps=(unsigned char *)(s); \
-                             int z; \
-			     for(z=0;z<32;z++){ \
-                               pd[z]^=ps[z]; \
-			     } \
-                           }while(0)
-
-#else
-#define XOR_4_BY(d,s1,s2)    do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \
-                               *pd = *ps1  ^ *ps2; }while(0)
-#define XOR_8_BY(d,s1,s2)    do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
-                               *pd = *ps1  ^ *ps2; }while(0)
-#define XOREQ_4_BY(d,s)      do{ int *pd=(int *)(d), *ps=(int *)(s); \
-                               *pd ^= *ps; }while(0)
-#define XOREQ_8_BY(d,s)      do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
-                               *pd ^= *ps; }while(0)
-#define COPY_4_BY(d,s)       do{ int *pd=(int *)(d), *ps=(int *)(s); \
-                               *pd = *ps; }while(0)
-#define COPY_8_BY(d,s)       do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
-                               *pd = *ps; }while(0)
-
-#define BEST_SPAN            8
-#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0);
-#define XOREQ_BEST_BY(d,s)   do{ XOREQ_8_BY(d,s); }while(0);
-#define COPY_BEST_BY(d,s)    do{ COPY_8_BY(d,s); }while(0);
-
-#define END_MM             do{ }while(0);
-#endif
diff --git a/contrib/sasc-ng/FFdecsa/parallel_std_def.h b/contrib/sasc-ng/FFdecsa/parallel_std_def.h
deleted file mode 100644
index 10517d4..0000000
--- a/contrib/sasc-ng/FFdecsa/parallel_std_def.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define FFXOR(a,b) ((a)^(b))
-#define FFAND(a,b) ((a)&(b))
-#define FFOR(a,b)  ((a)|(b))
-#define FFNOT(a)   (~(a))
-
-#define B_FFAND(a,b) ((a)&(b))
-#define B_FFOR(a,b)  ((a)|(b))
-#define B_FFXOR(a,b) ((a)^(b))
-#define B_FFSH8L(a,n) ((a)<<(n))
-#define B_FFSH8R(a,n) ((a)>>(n))
diff --git a/contrib/sasc-ng/FFdecsa/stream.c b/contrib/sasc-ng/FFdecsa/stream.c
deleted file mode 100644
index 1bda852..0000000
--- a/contrib/sasc-ng/FFdecsa/stream.c
+++ /dev/null
@@ -1,906 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-
-// define statics only once, when STREAM_INIT
-#ifdef STREAM_INIT
-struct stream_regs {
-  group A[32+10][4]; // 32 because we will move back (virtual shift register)
-  group B[32+10][4]; // 32 because we will move back (virtual shift register)
-  group X[4];
-  group Y[4];
-  group Z[4];
-  group D[4];
-  group E[4];
-  group F[4];
-  group p;
-  group q;
-  group r;
-  };
-
-static inline void trasp64_32_88ccw(unsigned char *data){
-/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define row ((unsigned int *)data)
-  int i,j;
-  for(j=0;j<64;j+=32){
-    unsigned int t,b;
-    for(i=0;i<16;i++){
-      t=row[j+i];
-      b=row[j+16+i];
-      row[j+i]   = (t&0x0000ffff)      | ((b           )<<16);
-      row[j+16+i]=((t           )>>16) |  (b&0xffff0000) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned int t,b;
-    for(i=0;i<8;i++){
-      t=row[j+i];
-      b=row[j+8+i];
-      row[j+i]   = (t&0x00ff00ff)     | ((b&0x00ff00ff)<<8);
-      row[j+8+i] =((t&0xff00ff00)>>8) |  (b&0xff00ff00);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned int t,b;
-    for(i=0;i<4;i++){
-      t=row[j+i];
-      b=row[j+4+i];
-      row[j+i]   =((t&0x0f0f0f0f)<<4) |  (b&0x0f0f0f0f);
-      row[j+4+i] = (t&0xf0f0f0f0)     | ((b&0xf0f0f0f0)>>4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned int t,b;
-    for(i=0;i<2;i++){
-      t=row[j+i];
-      b=row[j+2+i];
-      row[j+i]   =((t&0x33333333)<<2) |  (b&0x33333333);
-      row[j+2+i] = (t&0xcccccccc)     | ((b&0xcccccccc)>>2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned int t,b;
-    for(i=0;i<1;i++){
-      t=row[j+i];
-      b=row[j+1+i];
-      row[j+i]   =((t&0x55555555)<<1) |  (b&0x55555555);
-      row[j+1+i] = (t&0xaaaaaaaa)     | ((b&0xaaaaaaaa)>>1);
-    }
-  }
-#undef row
-}
-
-static inline void trasp64_32_88cw(unsigned char *data){
-/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define row ((unsigned int *)data)
-  int i,j;
-  for(j=0;j<64;j+=32){
-    unsigned int t,b;
-    for(i=0;i<16;i++){
-      t=row[j+i];
-      b=row[j+16+i];
-      row[j+i]   = (t&0x0000ffff)      | ((b           )<<16);
-      row[j+16+i]=((t           )>>16) |  (b&0xffff0000) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned int t,b;
-    for(i=0;i<8;i++){
-      t=row[j+i];
-      b=row[j+8+i];
-      row[j+i]   = (t&0x00ff00ff)     | ((b&0x00ff00ff)<<8);
-      row[j+8+i] =((t&0xff00ff00)>>8) |  (b&0xff00ff00);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned int t,b;
-    for(i=0;i<4;i++){
-      t=row[j+i];
-      b=row[j+4+i];
-      row[j+i]  =((t&0xf0f0f0f0)>>4) |   (b&0xf0f0f0f0);
-      row[j+4+i]= (t&0x0f0f0f0f)     |  ((b&0x0f0f0f0f)<<4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned int t,b;
-    for(i=0;i<2;i++){
-      t=row[j+i];
-      b=row[j+2+i];
-      row[j+i]  =((t&0xcccccccc)>>2) |  (b&0xcccccccc);
-      row[j+2+i]= (t&0x33333333)     | ((b&0x33333333)<<2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned int t,b;
-    for(i=0;i<1;i++){
-      t=row[j+i];
-      b=row[j+1+i];
-      row[j+i]  =((t&0xaaaaaaaa)>>1) |  (b&0xaaaaaaaa);
-      row[j+1+i]= (t&0x55555555)     | ((b&0x55555555)<<1);
-    }
-  }
-#undef row
-}
-
-//64-64----------------------------------------------------------
-static inline void trasp64_64_88ccw(unsigned char *data){
-/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define row ((unsigned long long int *)data)
-  int i,j;
-  for(j=0;j<64;j+=64){
-    unsigned long long int t,b;
-    for(i=0;i<32;i++){
-      t=row[j+i];
-      b=row[j+32+i];
-      row[j+i]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      row[j+32+i]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=32){
-    unsigned long long int t,b;
-    for(i=0;i<16;i++){
-      t=row[j+i];
-      b=row[j+16+i];
-      row[j+i]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned long long int t,b;
-    for(i=0;i<8;i++){
-      t=row[j+i];
-      b=row[j+8+i];
-      row[j+i]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned long long int t,b;
-    for(i=0;i<4;i++){
-      t=row[j+i];
-      b=row[j+4+i];
-      row[j+i]   =((t&0x0f0f0f0f0f0f0f0fULL)<<4) |  (b&0x0f0f0f0f0f0f0f0fULL);
-      row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL)     | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned long long int t,b;
-    for(i=0;i<2;i++){
-      t=row[j+i];
-      b=row[j+2+i];
-      row[j+i]   =((t&0x3333333333333333ULL)<<2) |  (b&0x3333333333333333ULL);
-      row[j+2+i] = (t&0xccccccccccccccccULL)     | ((b&0xccccccccccccccccULL)>>2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned long long int t,b;
-    for(i=0;i<1;i++){
-      t=row[j+i];
-      b=row[j+1+i];
-      row[j+i]   =((t&0x5555555555555555ULL)<<1) |  (b&0x5555555555555555ULL);
-      row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL)     | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
-    }
-  }
-#undef row
-}
-
-static inline void trasp64_64_88cw(unsigned char *data){
-/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define row ((unsigned long long int *)data)
-  int i,j;
-  for(j=0;j<64;j+=64){
-    unsigned long long int t,b;
-    for(i=0;i<32;i++){
-      t=row[j+i];
-      b=row[j+32+i];
-      row[j+i]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      row[j+32+i]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=32){
-    unsigned long long int t,b;
-    for(i=0;i<16;i++){
-      t=row[j+i];
-      b=row[j+16+i];
-      row[j+i]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned long long int t,b;
-    for(i=0;i<8;i++){
-      t=row[j+i];
-      b=row[j+8+i];
-      row[j+i]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned long long int t,b;
-    for(i=0;i<4;i++){
-      t=row[j+i];
-      b=row[j+4+i];
-      row[j+i]   =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) |   (b&0xf0f0f0f0f0f0f0f0ULL);
-      row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL)     |  ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned long long int t,b;
-    for(i=0;i<2;i++){
-      t=row[j+i];
-      b=row[j+2+i];
-      row[j+i]   =((t&0xccccccccccccccccULL)>>2) |  (b&0xccccccccccccccccULL);
-      row[j+2+i] = (t&0x3333333333333333ULL)     | ((b&0x3333333333333333ULL)<<2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned long long int t,b;
-    for(i=0;i<1;i++){
-      t=row[j+i];
-      b=row[j+1+i];
-      row[j+i]   =((t&0xaaaaaaaaaaaaaaaaULL)>>1) |  (b&0xaaaaaaaaaaaaaaaaULL);
-      row[j+1+i] = (t&0x5555555555555555ULL)     | ((b&0x5555555555555555ULL)<<1);
-    }
-  }
-#undef row
-}
-
-//64-128----------------------------------------------------------
-static inline void trasp64_128_88ccw(unsigned char *data){
-/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define halfrow ((unsigned long long int *)data)
-  int i,j;
-  for(j=0;j<64;j+=64){
-    unsigned long long int t,b;
-    for(i=0;i<32;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+32+i)];
-      halfrow[2*(j+i)]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      halfrow[2*(j+32+i)]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+32+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      halfrow[2*(j+32+i)+1]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=32){
-    unsigned long long int t,b;
-    for(i=0;i<16;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+16+i)];
-      halfrow[2*(j+i)]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+16+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned long long int t,b;
-    for(i=0;i<8;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+8+i)];
-      halfrow[2*(j+i)]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+8+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned long long int t,b;
-    for(i=0;i<4;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+4+i)];
-      halfrow[2*(j+i)]   =((t&0x0f0f0f0f0f0f0f0fULL)<<4) |  (b&0x0f0f0f0f0f0f0f0fULL);
-      halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL)     | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+4+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0x0f0f0f0f0f0f0f0fULL)<<4) |  (b&0x0f0f0f0f0f0f0f0fULL);
-      halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL)     | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned long long int t,b;
-    for(i=0;i<2;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+2+i)];
-      halfrow[2*(j+i)]   =((t&0x3333333333333333ULL)<<2) |  (b&0x3333333333333333ULL);
-      halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL)     | ((b&0xccccccccccccccccULL)>>2);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+2+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0x3333333333333333ULL)<<2) |  (b&0x3333333333333333ULL);
-      halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL)     | ((b&0xccccccccccccccccULL)>>2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned long long int t,b;
-    for(i=0;i<1;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+1+i)];
-      halfrow[2*(j+i)]   =((t&0x5555555555555555ULL)<<1) |  (b&0x5555555555555555ULL);
-      halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL)     | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+1+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0x5555555555555555ULL)<<1) |  (b&0x5555555555555555ULL);
-      halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL)     | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
-    }
-  }
-#undef halfrow
-}
-
-static inline void trasp64_128_88cw(unsigned char *data){
-/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define halfrow ((unsigned long long int *)data)
-  int i,j;
-  for(j=0;j<64;j+=64){
-    unsigned long long int t,b;
-    for(i=0;i<32;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+32+i)];
-      halfrow[2*(j+i)]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      halfrow[2*(j+32+i)]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+32+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      halfrow[2*(j+32+i)+1]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=32){
-    unsigned long long int t,b;
-    for(i=0;i<16;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+16+i)];
-      halfrow[2*(j+i)]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+16+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned long long int t,b;
-    for(i=0;i<8;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+8+i)];
-      halfrow[2*(j+i)]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+8+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned long long int t,b;
-    for(i=0;i<4;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+4+i)];
-      halfrow[2*(j+i)]   =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) |   (b&0xf0f0f0f0f0f0f0f0ULL);
-      halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL)     |  ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+4+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) |   (b&0xf0f0f0f0f0f0f0f0ULL);
-      halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL)     |  ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned long long int t,b;
-    for(i=0;i<2;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+2+i)];
-      halfrow[2*(j+i)]   =((t&0xccccccccccccccccULL)>>2) |  (b&0xccccccccccccccccULL);
-      halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL)     | ((b&0x3333333333333333ULL)<<2);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+2+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0xccccccccccccccccULL)>>2) |  (b&0xccccccccccccccccULL);
-      halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL)     | ((b&0x3333333333333333ULL)<<2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned long long int t,b;
-    for(i=0;i<1;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+1+i)];
-      halfrow[2*(j+i)]   =((t&0xaaaaaaaaaaaaaaaaULL)>>1) |  (b&0xaaaaaaaaaaaaaaaaULL);
-      halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL)     | ((b&0x5555555555555555ULL)<<1);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+1+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0xaaaaaaaaaaaaaaaaULL)>>1) |  (b&0xaaaaaaaaaaaaaaaaULL);
-      halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL)     | ((b&0x5555555555555555ULL)<<1);
-    }
-  }
-#undef halfrow
-}
-#endif
-
-
-#ifdef STREAM_INIT
-void stream_cypher_group_init(
-  struct stream_regs *regs,
-  group         iA[8][4], // [In]  iA00,iA01,...iA73 32 groups  | Derived from key.
-  group         iB[8][4], // [In]  iB00,iB01,...iB73 32 groups  | Derived from key.
-  unsigned char *sb)      // [In]  (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input.
-#endif
-#ifdef STREAM_NORMAL
-void stream_cypher_group_normal(
-  struct stream_regs *regs,
-  unsigned char *cb)    // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output.
-#endif
-{
-#ifdef STREAM_INIT
-  group in1[4];
-  group in2[4];
-#endif
-  group extra_B[4];
-  group fa,fb,fc,fd,fe;
-  group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b;
-  group next_E[4];
-  group tmp0,tmp1,tmp2,tmp3,tmp4;
-#ifdef STREAM_INIT
-  group *sb_g=(group *)sb;
-#endif
-#ifdef STREAM_NORMAL
-  group *cb_g=(group *)cb;
-#endif
-  int aboff;
-  int i,j,k,b;
-  int dbg;
-
-#ifdef STREAM_INIT
-  DBG(fprintf(stderr,":::::::::: BEGIN STREAM INIT\n"));
-#endif
-#ifdef STREAM_NORMAL
-  DBG(fprintf(stderr,":::::::::: BEGIN STREAM NORMAL\n"));
-#endif
-#ifdef STREAM_INIT
-for(j=0;j<64;j++){
-  DBG(fprintf(stderr,"precall prerot stream_in[%2i]=",j));
-  DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
-}
-
-DBG(dump_mem("stream_prerot ",sb,GROUP_PARALLELISM*8,BYPG));
-#if GROUP_PARALLELISM==32
-trasp64_32_88ccw(sb);
-#endif
-#if GROUP_PARALLELISM==64
-trasp64_64_88ccw(sb);
-#endif
-#if GROUP_PARALLELISM==128
-trasp64_128_88ccw(sb);
-#endif
-DBG(dump_mem("stream_postrot",sb,GROUP_PARALLELISM*8,BYPG));
-
-for(j=0;j<64;j++){
-  DBG(fprintf(stderr,"precall stream_in[%2i]=",j));
-  DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
-}
-#endif
-
-  aboff=32;
-
-#ifdef STREAM_INIT
-  // load first 32 bits of ck into A[aboff+0]..A[aboff+7]
-  // load last  32 bits of ck into B[aboff+0]..B[aboff+7]
-  // all other regs = 0
-  for(i=0;i<8;i++){
-    for(b=0;b<4;b++){
-DBG(fprintf(stderr,"dbg from iA A[%i][%i]=",i,b));
-DBG(dump_mem("",(unsigned char *)&iA[i][b],BYPG,BYPG));
-DBG(fprintf(stderr,"                                       dbg from iB B[%i][%i]=",i,b));
-DBG(dump_mem("",(unsigned char *)&iB[i][b],BYPG,BYPG));
-      regs->A[aboff+i][b]=iA[i][b];
-      regs->B[aboff+i][b]=iB[i][b];
-    }
-  }
-  for(b=0;b<4;b++){
-    regs->A[aboff+8][b]=FF0();
-    regs->A[aboff+9][b]=FF0();
-    regs->B[aboff+8][b]=FF0();
-    regs->B[aboff+9][b]=FF0();
-  }
-  for(b=0;b<4;b++){
-    regs->X[b]=FF0();
-    regs->Y[b]=FF0();
-    regs->Z[b]=FF0();
-    regs->D[b]=FF0();
-    regs->E[b]=FF0();
-    regs->F[b]=FF0();
-  }
-  regs->p=FF0();
-  regs->q=FF0();
-  regs->r=FF0();
-#endif
-
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"dbg A0[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->A[aboff+0][dbg],BYPG,BYPG));
-  DBG(fprintf(stderr,"dbg B0[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->B[aboff+0][dbg],BYPG,BYPG));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-  // EXTERNAL LOOP - 8 bytes per operation
-  for(i=0;i<8;i++){
-
-    DBG(fprintf(stderr,"--BEGIN EXTERNAL LOOP %i\n",i));
-
-#ifdef STREAM_INIT
-    for(b=0;b<4;b++){
-      in1[b]=sb_g[8*i+4+b];
-      in2[b]=sb_g[8*i+b];
-    }
-#endif
-
-    // INTERNAL LOOP - 2 bits per iteration
-    for(j=0; j<4; j++){
-
-      DBG(fprintf(stderr,"---BEGIN INTERNAL LOOP %i (EXT %i, INT %i)\n",j,i,j));
-
-      // from A0..A9, 35 bits are selected as inputs to 7 s-boxes
-      // 5 bits input per s-box, 2 bits output per s-box
-
-      // we can select bits with zero masking and shifting operations
-      // and synthetize s-boxes with optimized boolean functions.
-      // this is the actual reason we do all the crazy transposition
-      // stuff to switch between normal and bit slice representations.
-      // this code really flies.
-
-      fe=regs->A[aboff+3][0];fa=regs->A[aboff+0][2];fb=regs->A[aboff+5][1];fc=regs->A[aboff+6][3];fd=regs->A[aboff+8][0];
-/* 1000 1110  1110 0001   : lev  7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) );
-/* 1110 0010  0011 0011   : lev  6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) );
-/* 0011 0110  1000 1101   : lev  5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) );
-/* 0101 0101  1001 0011   : lev  5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) );
-/* 1000 1110  1110 0001   : lev  7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1())));
-/* 1110 0010  0011 0011   : lev  6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1()));
-/* 0011 0110  1000 1101   : lev  5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc)));
-/* 0101 0101  1001 0011   : lev  5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd)));
-      s1a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s1b=FFXOR(tmp2,FFAND(fe,tmp3));
-//dump_mem("s1as1b-fe",&fe,BYPG,BYPG);
-//dump_mem("s1as1b-fa",&fa,BYPG,BYPG);
-//dump_mem("s1as1b-fb",&fb,BYPG,BYPG);
-//dump_mem("s1as1b-fc",&fc,BYPG,BYPG);
-//dump_mem("s1as1b-fd",&fd,BYPG,BYPG);
-
-      fe=regs->A[aboff+1][1];fa=regs->A[aboff+2][2];fb=regs->A[aboff+5][3];fc=regs->A[aboff+6][0];fd=regs->A[aboff+8][1];
-/* 1001 1110  0110 0001   : lev  6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) );
-/* 0000 0011  0111 1011   : lev  5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) );
-/* 1100 0110  1101 0010   : lev  6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) );
-/* 0001 1110  1111 0101   : lev  5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) );
-/* 1001 1110  0110 0001   : lev  6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1()))));
-/* 0000 0011  0111 1011   : lev  5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc));
-/* 1100 0110  1101 0010   : lev  6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1()))));
-/* 0001 1110  1111 0101   : lev  5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd))));
-      s2a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s2b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-      fe=regs->A[aboff+0][3];fa=regs->A[aboff+1][0];fb=regs->A[aboff+4][1];fc=regs->A[aboff+4][3];fd=regs->A[aboff+5][2];
-/* 0100 1011  1001 0110   : lev  5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) );
-/* 1101 0101  1000 1100   : lev  7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) );
-/* 0010 0111  1101 1000   : lev  4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) );
-/* 1111 1111  1111 1111   : lev  0: */ //tmp3=ALL_ONES;
-/* 0100 1011  1001 0110   : lev  5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd)));
-/* 1101 0101  1000 1100   : lev  7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1()))));
-/* 0010 0111  1101 1000   : lev  4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc));
-/* 1111 1111  1111 1111   : lev  0: */ tmp3=FF1();
-      s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1));
-      s3b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-      fe=regs->A[aboff+2][3];fa=regs->A[aboff+0][1];fb=regs->A[aboff+1][3];fc=regs->A[aboff+3][2];fd=regs->A[aboff+7][0];
-/* 1011 0101  0100 1001   : lev  7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) );
-/* 0010 1101  0110 0110   : lev  6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) );
-/* 0110 0111  1101 0000   : lev  7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) );
-/* 1111 1111  1111 1111   : lev  0: */ //tmp3=ALL_ONES;
-/* 1011 0101  0100 1001   : lev  7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1())))));
-/* 0010 1101  0110 0110   : lev  6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc)));
-/* 0110 0111  1101 0000   : lev  7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd)));
-/* 1111 1111  1111 1111   : lev  0: */ tmp3=FF1();
-      s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0)));
-      s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3));
-
-      fe=regs->A[aboff+4][2];fa=regs->A[aboff+3][3];fb=regs->A[aboff+5][0];fc=regs->A[aboff+7][1];fd=regs->A[aboff+8][2];
-/* 1000 1111  0011 0010   : lev  7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) );
-/* 0110 1011  0000 1011   : lev  6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) );
-/* 0001 1010  0111 1001   : lev  6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) );
-/* 0101 1101  1101 0101   : lev  4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd );
-/* 1000 1111  0011 0010   : lev  7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1()));
-/* 0110 1011  0000 1011   : lev  6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd)))));
-/* 0001 1010  0111 1001   : lev  6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd)));
-/* 0101 1101  1101 0101   : lev  4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd);
-      s5a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s5b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-      fe=regs->A[aboff+2][1];fa=regs->A[aboff+3][1];fb=regs->A[aboff+4][0];fc=regs->A[aboff+6][2];fd=regs->A[aboff+8][3];
-/* 0011 0110  0010 1101   : lev  6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) );
-/* 1110 1110  1011 1011   : lev  3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES );
-/* 0101 1000  0110 0111   : lev  6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) );
-/* 0001 0011  0000 0001   : lev  5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) );
-/* 0011 0110  0010 1101   : lev  6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc));
-/* 1110 1110  1011 1011   : lev  3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1());
-/* 0101 1000  0110 0111   : lev  6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd)));
-/* 0001 0011  0000 0001   : lev  5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd)));
-      s6a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s6b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-      fe=regs->A[aboff+1][2];fa=regs->A[aboff+2][0];fb=regs->A[aboff+6][1];fc=regs->A[aboff+7][2];fd=regs->A[aboff+7][3];
-/* 0111 1000  1001 0110   : lev  5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) );
-/* 0100 1001  0101 1011   : lev  6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) );
-/* 0100 1001  1011 1001   : lev  5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) );
-/* 1111 1111  1101 1101   : lev  3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) );
-/* 0111 1000  1001 0110   : lev  5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd))));
-/* 0100 1001  0101 1011   : lev  6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd))));
-/* 0100 1001  1011 1001   : lev  5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd));
-/* 1111 1111  1101 1101   : lev  3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1()));
-      s7a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s7b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-
-/*
-      we have just done this:
-      
-      int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0};
-      int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1};
-      int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1};
-      int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1};
-      int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2};
-      int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0};
-      int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2};
-
-      s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ]
-           |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ];
-      s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ]
-           |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ];
-      s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ]
-           |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ];
-      s7 =  sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ];
-*/
-
-      // use 4x4 xor to produce extra nibble for T3
-
-      extra_B[3]=FFXOR(FFXOR(FFXOR(regs->B[aboff+2][0],regs->B[aboff+5][1]),regs->B[aboff+6][2]),regs->B[aboff+8][3]);
-      extra_B[2]=FFXOR(FFXOR(FFXOR(regs->B[aboff+5][0],regs->B[aboff+7][1]),regs->B[aboff+2][3]),regs->B[aboff+3][2]);
-      extra_B[1]=FFXOR(FFXOR(FFXOR(regs->B[aboff+4][3],regs->B[aboff+7][2]),regs->B[aboff+3][0]),regs->B[aboff+4][1]);
-      extra_B[0]=FFXOR(FFXOR(FFXOR(regs->B[aboff+8][2],regs->B[aboff+5][3]),regs->B[aboff+2][1]),regs->B[aboff+7][0]);
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"extra_B[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&extra_B[dbg],BYPG,BYPG));
-}
-
-      // T1 = xor all inputs
-      // in1, in2, D are only used in T1 during initialisation, not generation
-      for(b=0;b<4;b++){
-        regs->A[aboff-1][b]=FFXOR(regs->A[aboff+9][b],regs->X[b]);
-      }
-
-#ifdef STREAM_INIT
-      for(b=0;b<4;b++){
-        regs->A[aboff-1][b]=FFXOR(FFXOR(regs->A[aboff-1][b],regs->D[b]),((j % 2) ? in2[b] : in1[b]));
-      }
-#endif
-
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"next_A0[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->A[aboff-1][dbg],BYPG,BYPG));
-}
-
-      // T2 =  xor all inputs
-      // in1, in2 are only used in T1 during initialisation, not generation
-      // if p=0, use this, if p=1, rotate the result left
-      for(b=0;b<4;b++){
-        regs->B[aboff-1][b]=FFXOR(FFXOR(regs->B[aboff+6][b],regs->B[aboff+9][b]),regs->Y[b]);
-      }
-
-#ifdef STREAM_INIT
-      for(b=0;b<4;b++){
-        regs->B[aboff-1][b]=FFXOR(regs->B[aboff-1][b],((j % 2) ? in1[b] : in2[b]));
-      }
-#endif
-
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"next_B0[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->B[aboff-1][dbg],BYPG,BYPG));
-}
-
-      // if p=1, rotate left (yes, this is what we're doing)
-      tmp3=regs->B[aboff-1][3];
-      regs->B[aboff-1][3]=FFXOR(regs->B[aboff-1][3],FFAND(FFXOR(regs->B[aboff-1][3],regs->B[aboff-1][2]),regs->p));
-      regs->B[aboff-1][2]=FFXOR(regs->B[aboff-1][2],FFAND(FFXOR(regs->B[aboff-1][2],regs->B[aboff-1][1]),regs->p));
-      regs->B[aboff-1][1]=FFXOR(regs->B[aboff-1][1],FFAND(FFXOR(regs->B[aboff-1][1],regs->B[aboff-1][0]),regs->p));
-      regs->B[aboff-1][0]=FFXOR(regs->B[aboff-1][0],FFAND(FFXOR(regs->B[aboff-1][0],tmp3),regs->p));
-
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"next_B0[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->B[aboff-1][dbg],BYPG,BYPG));
-}
-
-      // T3 = xor all inputs
-      for(b=0;b<4;b++){
-        regs->D[b]=FFXOR(FFXOR(regs->E[b],regs->Z[b]),extra_B[b]);
-      }
-
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"D[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->D[dbg],BYPG,BYPG));
-}
-
-      // T4 = sum, carry of Z + E + r
-      for(b=0;b<4;b++){
-        next_E[b]=regs->F[b];
-      }
-
-      tmp0=FFXOR(regs->Z[0],regs->E[0]);
-      tmp1=FFAND(regs->Z[0],regs->E[0]);
-      regs->F[0]=FFXOR(regs->E[0],FFAND(regs->q,FFXOR(regs->Z[0],regs->r)));
-      tmp3=FFAND(tmp0,regs->r);
-      tmp4=FFOR(tmp1,tmp3);
-
-      tmp0=FFXOR(regs->Z[1],regs->E[1]);
-      tmp1=FFAND(regs->Z[1],regs->E[1]);
-      regs->F[1]=FFXOR(regs->E[1],FFAND(regs->q,FFXOR(regs->Z[1],tmp4)));
-      tmp3=FFAND(tmp0,tmp4);
-      tmp4=FFOR(tmp1,tmp3);
-
-      tmp0=FFXOR(regs->Z[2],regs->E[2]);
-      tmp1=FFAND(regs->Z[2],regs->E[2]);
-      regs->F[2]=FFXOR(regs->E[2],FFAND(regs->q,FFXOR(regs->Z[2],tmp4)));
-      tmp3=FFAND(tmp0,tmp4);
-      tmp4=FFOR(tmp1,tmp3);
-
-      tmp0=FFXOR(regs->Z[3],regs->E[3]);
-      tmp1=FFAND(regs->Z[3],regs->E[3]);
-      regs->F[3]=FFXOR(regs->E[3],FFAND(regs->q,FFXOR(regs->Z[3],tmp4)));
-      tmp3=FFAND(tmp0,tmp4);
-      regs->r=FFXOR(regs->r,FFAND(regs->q,FFXOR(FFOR(tmp1,tmp3),regs->r))); // ultimate carry
-
-/*
-      we have just done this: (believe it or not)
-      
-      if (q) {
-        F = Z + E + r;
-        r = (F >> 4) & 1;
-        F = F & 0x0f;
-      }
-      else {
-          F = E;
-      }
-*/
-      for(b=0;b<4;b++){
-        regs->E[b]=next_E[b];
-      }
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"F[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->F[dbg],BYPG,BYPG));
-}
-DBG(fprintf(stderr,"r="));
-DBG(dump_mem("",(unsigned char *)&regs->r,BYPG,BYPG));
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"E[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->E[dbg],BYPG,BYPG));
-}
-
-      // this simple instruction is virtually shifting all the shift registers
-      aboff--;
-
-/*
-      we've just done this:
-
-      A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0;
-      B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0;
-*/
-
-      regs->X[0]=s1a;
-      regs->X[1]=s2a;
-      regs->X[2]=s3b;
-      regs->X[3]=s4b;
-      regs->Y[0]=s3a;
-      regs->Y[1]=s4a;
-      regs->Y[2]=s5b;
-      regs->Y[3]=s6b;
-      regs->Z[0]=s5a;
-      regs->Z[1]=s6a;
-      regs->Z[2]=s1b;
-      regs->Z[3]=s2b;
-      regs->p=s7a;
-      regs->q=s7b;
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"X[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->X[dbg],BYPG,BYPG));
-}
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"Y[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->Y[dbg],BYPG,BYPG));
-}
-for(dbg=0;dbg<4;dbg++){
-  DBG(fprintf(stderr,"Z[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&regs->Z[dbg],BYPG,BYPG));
-}
-DBG(fprintf(stderr,"p="));
-DBG(dump_mem("",(unsigned char *)&regs->p,BYPG,BYPG));
-DBG(fprintf(stderr,"q="));
-DBG(dump_mem("",(unsigned char *)&regs->q,BYPG,BYPG));
-
-#ifdef STREAM_NORMAL
-      // require 4 loops per output byte
-      // 2 output bits are a function of the 4 bits of D
-      // xor 2 by 2
-      cb_g[8*i+7-2*j]=FFXOR(regs->D[2],regs->D[3]);
-      cb_g[8*i+6-2*j]=FFXOR(regs->D[0],regs->D[1]);
-for(dbg=0;dbg<8;dbg++){
-  DBG(fprintf(stderr,"op[%i]=",dbg));
-  DBG(dump_mem("",(unsigned char *)&cb_g[8*i+dbg],BYPG,BYPG));
-}
-#endif
-
-DBG(fprintf(stderr,"---END INTERNAL LOOP\n"));
-
-    } // INTERNAL LOOP
-
-DBG(fprintf(stderr,"--END EXTERNAL LOOP\n"));
-
-  } // EXTERNAL LOOP
-
-  // move 32 steps forward, ready for next call
-  for(k=0;k<10;k++){
-    for(b=0;b<4;b++){
-DBG(fprintf(stderr,"moving forward AB k=%i b=%i\n",k,b));
-      regs->A[32+k][b]=regs->A[k][b];
-      regs->B[32+k][b]=regs->B[k][b];
-    }
-  }
-
-
-////////////////////////////////////////////////////////////////////////////////
-
-#ifdef STREAM_NORMAL
-for(j=0;j<64;j++){
-  DBG(fprintf(stderr,"postcall prerot cb[%2i]=",j));
-  DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
-}
-
-#if GROUP_PARALLELISM==32
-trasp64_32_88cw(cb);
-#endif
-#if GROUP_PARALLELISM==64
-trasp64_64_88cw(cb);
-#endif
-#if GROUP_PARALLELISM==128
-trasp64_128_88cw(cb);
-#endif
-
-for(j=0;j<64;j++){
-  DBG(fprintf(stderr,"postcall postrot cb[%2i]=",j));
-  DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
-}
-#endif
-
-#ifdef STREAM_INIT
-  DBG(fprintf(stderr,":::::::::: END STREAM INIT\n"));
-#endif
-#ifdef STREAM_NORMAL
-  DBG(fprintf(stderr,":::::::::: END STREAM NORMAL\n"));
-#endif
-
-}
-
diff --git a/contrib/sasc-ng/FFdecsa/tmp_autogenerated_stuff_FFdecsa.c b/contrib/sasc-ng/FFdecsa/tmp_autogenerated_stuff_FFdecsa.c
deleted file mode 100644
index 3c7788b..0000000
--- a/contrib/sasc-ng/FFdecsa/tmp_autogenerated_stuff_FFdecsa.c
+++ /dev/null
@@ -1,790 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#include <sys/types.h>
-#include <string.h>
-#include <stdio.h>
-
-#include "FFdecsa.h"
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-// activate debug by changing the grep command there.
-// don't edit autogenerated files (name beginning with "_").
-
-//// parallelization stuff, large speed differences are possible
-// possible choices
-#define PARALLEL_32_4CHAR     320
-#define PARALLEL_32_4CHARA    321
-#define PARALLEL_32_INT       322
-#define PARALLEL_64_8CHAR     640
-#define PARALLEL_64_8CHARA    641
-#define PARALLEL_64_2INT      642
-#define PARALLEL_64_LONG      643
-#define PARALLEL_64_MMX       644
-#define PARALLEL_128_16CHAR  1280
-#define PARALLEL_128_16CHARA 1281
-#define PARALLEL_128_4INT    1282
-#define PARALLEL_128_2LONG   1283
-#define PARALLEL_128_2MMX    1284
-#define PARALLEL_128_SSE     1285
-
-//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
-#define PARALLEL_MODE PARALLEL_64_MMX
-//////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
-
-#include "parallel_generic.h"
-//// conditionals
-#if PARALLEL_MODE==PARALLEL_32_4CHAR
-#include "parallel_032_4char.h"
-#elif PARALLEL_MODE==PARALLEL_32_4CHARA
-#include "parallel_032_4charA.h"
-#elif PARALLEL_MODE==PARALLEL_32_INT
-#include "parallel_032_int.h"
-#elif PARALLEL_MODE==PARALLEL_64_8CHAR
-#include "parallel_064_8char.h"
-#elif PARALLEL_MODE==PARALLEL_64_8CHARA
-#include "parallel_064_8charA.h"
-#elif PARALLEL_MODE==PARALLEL_64_2INT
-#include "parallel_064_2int.h"
-#elif PARALLEL_MODE==PARALLEL_64_LONG
-#include "parallel_064_long.h"
-#elif PARALLEL_MODE==PARALLEL_64_MMX
-#include "parallel_064_mmx.h"
-#elif PARALLEL_MODE==PARALLEL_128_16CHAR
-#include "parallel_128_16char.h"
-#elif PARALLEL_MODE==PARALLEL_128_16CHARA
-#include "parallel_128_16charA.h"
-#elif PARALLEL_MODE==PARALLEL_128_4INT
-#include "parallel_128_4int.h"
-#elif PARALLEL_MODE==PARALLEL_128_2LONG
-#include "parallel_128_2long.h"
-#elif PARALLEL_MODE==PARALLEL_128_2MMX
-#include "parallel_128_2mmx.h"
-#elif PARALLEL_MODE==PARALLEL_128_SSE
-#include "parallel_128_sse.h"
-#else
-#error "unknown/undefined parallel mode"
-#endif
-
-// stuff depending on conditionals
-
-#define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
-#define BYPG BYTES_PER_GROUP
-#define BITS_PER_GROUP GROUP_PARALLELISM
-#define BIPG BITS_PER_GROUP
-
-
-//// debug tool
-
-static void dump_mem(unsigned char *string, unsigned char *p, int len, int linelen){
-  int i;
-  for(i=0;i<len;i++){
-    if(i%linelen==0&&i) fprintf(stderr,"\n");
-    if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
-    else{
-      if(i%8==0) fprintf(stderr," ");
-      if(i%4==0) fprintf(stderr," ");
-    }
-    fprintf(stderr," %02x",p[i]);
-  }
-  if(i%linelen==0) fprintf(stderr,"\n");
-}
-
-//////////////////////////////////////////////////////////////////////////////////
-
-struct csa_key_t{
-	unsigned char ck[8];
-// used by stream
-        int iA[8];  // iA[0] is for A1, iA[7] is for A8
-        int iB[8];  // iB[0] is for B1, iB[7] is for B8
-// used by stream (group)
-        group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
-        group iA_g[8][4]; // [0 for A1][0 for LSB]
-        group iB_g[8][4]; // [0 for B1][0 for LSB]
-// used by block
-	unsigned char kk[56];
-// used by block (group)
-	__attribute__((aligned(16))) batch kkmulti[56]; // many times the same byte in every batch
-};
-
-static struct csa_keys_t{
-  struct csa_key_t even;
-  struct csa_key_t odd;
-} keys;
-
-
-//-----stream cypher
-
-//-----key schedule for stream decypher
-static void key_schedule_stream(
-  unsigned char *ck,    // [In]  ck[0]-ck[7]   8 bytes   | Key.
-  int *iA,              // [Out] iA[0]-iA[7]   8 nibbles | Key schedule.
-  int *iB)              // [Out] iB[0]-iB[7]   8 nibbles | Key schedule.
-{
-    iA[0]=(ck[0]>>4)&0xf;
-    iA[1]=(ck[0]   )&0xf;
-    iA[2]=(ck[1]>>4)&0xf;
-    iA[3]=(ck[1]   )&0xf;
-    iA[4]=(ck[2]>>4)&0xf;
-    iA[5]=(ck[2]   )&0xf;
-    iA[6]=(ck[3]>>4)&0xf;
-    iA[7]=(ck[3]   )&0xf;
-    iB[0]=(ck[4]>>4)&0xf;
-    iB[1]=(ck[4]   )&0xf;
-    iB[2]=(ck[5]>>4)&0xf;
-    iB[3]=(ck[5]   )&0xf;
-    iB[4]=(ck[6]>>4)&0xf;
-    iB[5]=(ck[6]   )&0xf;
-    iB[6]=(ck[7]>>4)&0xf;
-    iB[7]=(ck[7]   )&0xf;
-}
-
-//----- stream main function
-
-#define STREAM_INIT
-#include "tmp_autogenerated_stuff_stream.c"
-#undef STREAM_INIT
-
-#define STREAM_NORMAL
-#include "tmp_autogenerated_stuff_stream.c"
-#undef STREAM_NORMAL
-
-
-//-----block decypher
-
-//-----key schedule for block decypher
-
-static void key_schedule_block(
-  unsigned char *ck,    // [In]  ck[0]-ck[7]   8 bytes | Key.
-  unsigned char *kk)    // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
-{
-  static const unsigned char key_perm[0x40] = {
-    0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
-    0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
-    0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
-    0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
-  };
-
-  int i,j,k;
-  int bit[64];
-  int newbit[64];
-  int kb[7][8];
-
-  // 56 steps
-  // 56 key bytes kk(55)..kk(0) by key schedule from ck
-
-  // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
-  kb[6][0] = ck[0];
-  kb[6][1] = ck[1];
-  kb[6][2] = ck[2];
-  kb[6][3] = ck[3];
-  kb[6][4] = ck[4];
-  kb[6][5] = ck[5];
-  kb[6][6] = ck[6];
-  kb[6][7] = ck[7];
-
-  // calculate kb[5] .. kb[0]
-  for(i=5; i>=0; i--){
-    // 64 bit perm on kb
-    for(j=0; j<8; j++){
-      for(k=0; k<8; k++){
-        bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
-        newbit[key_perm[j*8+k]-1] = bit[j*8+k];
-      }
-    }
-    for(j=0; j<8; j++){
-      kb[i][j] = 0;
-      for(k=0; k<8; k++){
-        kb[i][j] |= newbit[j*8+k] << (7-k);
-      }
-    }
-  }
-
-  // xor to give kk
-  for(i=0; i<7; i++){
-    for(j=0; j<8; j++){
-      kk[i*8+j] = kb[i][j] ^ i;
-    }
-  }
-
-}
-
-//-----block utils
-
-static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
-  int *ri=(int *)in;
-  int *ibi=(int *)out;
-  int j,i,k,g;
-  // copy and first step
-  for(g=0;g<count;g++){
-    ri[g]=ibi[2*g];
-    ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
-  }
-//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01230123
-#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
-  for(j=0;j<8;j+=4){
-    for(i=0;i<2;i++){
-      for(k=0;k<INTS_PER_ROW;k++){
-        unsigned int t,b;
-        t=ri[INTS_PER_ROW*(j+i)+k];
-        b=ri[INTS_PER_ROW*(j+i+2)+k];
-        ri[INTS_PER_ROW*(j+i)+k]=     (t&0x0000ffff)      | ((b           )<<16);
-        ri[INTS_PER_ROW*(j+i+2)+k]=  ((t           )>>16) |  (b&0xffff0000) ;
-      }
-    }
-  }
-//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01010101
-  for(j=0;j<8;j+=2){
-    for(i=0;i<1;i++){
-      for(k=0;k<INTS_PER_ROW;k++){
-        unsigned int t,b;
-        t=ri[INTS_PER_ROW*(j+i)+k];
-        b=ri[INTS_PER_ROW*(j+i+1)+k];
-        ri[INTS_PER_ROW*(j+i)+k]=     (t&0x00ff00ff)     | ((b&0x00ff00ff)<<8);
-        ri[INTS_PER_ROW*(j+i+1)+k]=  ((t&0xff00ff00)>>8) |  (b&0xff00ff00);
-      }
-    }
-  }
-//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 00000000
-}
-
-static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
-  int *ri=(int *)in;
-  int *bdi=(int *)out;
-  int j,i,k,g;
-#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
-//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 00000000
-  for(j=0;j<8;j+=2){
-    for(i=0;i<1;i++){
-      for(k=0;k<INTS_PER_ROW;k++){
-        unsigned int t,b;
-        t=ri[INTS_PER_ROW*(j+i)+k];
-        b=ri[INTS_PER_ROW*(j+i+1)+k];
-        ri[INTS_PER_ROW*(j+i)+k]=     (t&0x00ff00ff)     | ((b&0x00ff00ff)<<8);
-        ri[INTS_PER_ROW*(j+i+1)+k]=  ((t&0xff00ff00)>>8) |  (b&0xff00ff00);
-      }
-    }
-  }
-//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01010101
-  for(j=0;j<8;j+=4){
-    for(i=0;i<2;i++){
-      for(k=0;k<INTS_PER_ROW;k++){
-        unsigned int t,b;
-        t=ri[INTS_PER_ROW*(j+i)+k];
-        b=ri[INTS_PER_ROW*(j+i+2)+k];
-        ri[INTS_PER_ROW*(j+i)+k]=     (t&0x0000ffff)      | ((b           )<<16);
-        ri[INTS_PER_ROW*(j+i+2)+k]=  ((t           )>>16) |  (b&0xffff0000) ;
-      }
-    }
-  }
-//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01230123
-  for(g=0;g<count;g++){
-    bdi[2*g]=ri[g];
-    bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
-  }
-}
-
-//-----block main function
-
-// block group
-static void block_decypher_group(
-  batch *kkmulti,       // [In]  kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
-  unsigned char *ib,    // [In]  (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
-  unsigned char *bd,    // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
-  int count)
-{
-  // int is faster than unsigned char. apparently not
-  static const unsigned char block_sbox[0x100] = {
-    0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
-    0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
-    0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
-    0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
-    0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
-    0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
-    0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
-    0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
-
-    0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
-    0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
-    0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
-    0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
-    0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
-    0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
-    0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
-    0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
-  };
-  unsigned char r[GROUP_PARALLELISM*(8+56)];  /* 56 because we will move back in memory while looping */
-  unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
-  int roff;
-  int i,g,count_all=GROUP_PARALLELISM;
-
-  roff=GROUP_PARALLELISM*56;
-
-#define FASTTRASP1
-#ifndef FASTTRASP1
-  for(g=0;g<count;g++){
-    // Init registers 
-    int j;
-    for(j=0;j<8;j++){
-      r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
-    }
-  }
-#else
-  trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
-#endif
-//dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-
-  // loop over kk[55]..kk[0]
-  for(i=55;i>=0;i--){
-    {
-      batch tkkmulti=kkmulti[i];
-      batch *si=(batch *)sbox_in;
-      batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
-      for(g=0;g<count_all/BYTES_PER_BATCH;g++){
-        si[g]=B_FFXOR(tkkmulti,r6_N[g]);              //FIXME: introduce FASTBATCH?
-      }
-    }
-
-    // table lookup, this works on only one byte at a time
-    // most difficult part of all
-    // - can't be parallelized
-    // - can't be synthetized through boolean terms (8 input bits are too many)
-    for(g=0;g<count_all;g++){
-      sbox_out[g]=block_sbox[sbox_in[g]];
-    }
-
-    // bit permutation
-    {
-      unsigned char *po=(unsigned char *)perm_out;
-      unsigned char *so=(unsigned char *)sbox_out;
-//dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
-      for(g=0;g<count_all;g+=BYTES_PER_BATCH){
-        batch in,out;
-        in=*(batch *)&so[g];
-
-        out=B_FFOR(
-	    B_FFOR(
-	    B_FFOR(
-	    B_FFOR(
-	    B_FFOR(
-	           B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
-	           B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
-	           B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
-	           B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
-	           B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
-	           B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
-
-        *(batch *)&po[g]=out;
-      }
-//dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
-    }
-
-    roff-=GROUP_PARALLELISM; /* virtual shift of registers */
-
-#if 0
-/* one by one */
-    for(g=0;g<count_all;g++){
-      r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
-      r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
-      r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
-      r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
-      r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
-    }
-#else
-    for(g=0;g<count_all;g+=BEST_SPAN){
-      XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
-      XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
-      XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
-      XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
-      XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
-    }
-#endif
-  }
-
-#define FASTTRASP2
-#ifndef FASTTRASP2
-  for(g=0;g<count;g++){
-    // Copy results
-    int j;
-    for(j=0;j<8;j++){
-      bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
-    }
-  }
-#else
-  trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
-#endif
-}
-
-//-----------------------------------EXTERNAL INTERFACE
-
-//-----get internal parallelism
-
-int get_internal_parallelism(void){
-  return GROUP_PARALLELISM;
-}
-
-//-----get suggested cluster size
-
-int get_suggested_cluster_size(void){
-  int r;
-  r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
-  if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
-  return r;
-}
-
-//-----set control words
-
-void set_control_words(unsigned char *ev, unsigned char *od){
-  // could be made faster, but is not run often
-  int bi,by;
-  int i,j;
-// key
-  memcpy(keys.even.ck,ev,8);
-  memcpy(keys.odd.ck,od,8);
-// precalculations for stream
-  key_schedule_stream(keys.even.ck,keys.even.iA,keys.even.iB);
-  key_schedule_stream(keys.odd.ck,keys.odd.iA,keys.odd.iB);
-  for(by=0;by<8;by++){
-    for(bi=0;bi<8;bi++){
-      keys.even.ck_g[by][bi]=(keys.even.ck[by]&(1<<bi))?FF1():FF0();
-      keys.odd.ck_g[by][bi]=(keys.odd.ck[by]&(1<<bi))?FF1():FF0();
-    }
-  }
-  for(by=0;by<8;by++){
-    for(bi=0;bi<4;bi++){
-      keys.even.iA_g[by][bi]=(keys.even.iA[by]&(1<<bi))?FF1():FF0();
-      keys.odd.iA_g[by][bi]=(keys.odd.iA[by]&(1<<bi))?FF1():FF0();
-      keys.even.iB_g[by][bi]=(keys.even.iB[by]&(1<<bi))?FF1():FF0();
-      keys.odd.iB_g[by][bi]=(keys.odd.iB[by]&(1<<bi))?FF1():FF0();
-    }
-  }
-// precalculations for block
-  key_schedule_block(keys.even.ck,keys.even.kk);
-  key_schedule_block(keys.odd.ck,keys.odd.kk);
-  for(i=0;i<56;i++){
-    for(j=0;j<BYTES_PER_BATCH;j++){
-      *(((unsigned char *)&keys.even.kkmulti[i])+j)=keys.even.kk[i];
-      *(((unsigned char *)&keys.odd.kkmulti[i])+j)=keys.odd.kk[i];
-    }
-  }
-}
-
-//-----get control words
-
-void get_control_words(unsigned char *even, unsigned char *odd){
-  memcpy(even,keys.even.ck,8);
-  memcpy(odd,keys.odd.ck,8);
-}
-
-//----- decrypt
-
-int decrypt_packets(unsigned char **cluster){
-  // statistics, currently unused
-  int stat_no_scramble=0;
-  int stat_reserved=0;
-  int stat_decrypted[2]={0,0};
-  int stat_decrypted_mini=0;
-  unsigned char **clst;
-  unsigned char **clst2;
-  int grouped;
-  int group_ev_od;
-  int advanced;
-  int can_advance;
-  unsigned char *g_pkt[GROUP_PARALLELISM];
-  int g_len[GROUP_PARALLELISM];
-  int g_offset[GROUP_PARALLELISM];
-  int g_n[GROUP_PARALLELISM];
-  int g_residue[GROUP_PARALLELISM];
-  unsigned char *pkt;
-  int xc0,ev_od,len,offset,n,residue;
-  struct csa_key_t* k;
-  int i,j,iter,g;
-  int t23,tsmall;
-  int alive[24];
-//icc craziness  int pad1=0; //////////align! FIXME
-  unsigned char *encp[GROUP_PARALLELISM];
-  unsigned char stream_in[GROUP_PARALLELISM*8];
-  unsigned char stream_out[GROUP_PARALLELISM*8];
-  unsigned char ib[GROUP_PARALLELISM*8];
-  unsigned char block_out[GROUP_PARALLELISM*8];
-
-//icc craziness  i=(int)&pad1;//////////align!!! FIXME
-
-  // build a list of packets to be processed
-  clst=cluster;
-  grouped=0;
-  advanced=0;
-  can_advance=1;
-  group_ev_od=-1; // silence incorrect compiler warning
-  pkt=*clst;
-  do{ // find a new packet
-    if(grouped==GROUP_PARALLELISM){
-      // full
-      break;
-    }
-    if(pkt==NULL){
-      // no more ranges
-      break;
-    }
-    if(pkt>=*(clst+1)){
-      // out of this range, try next
-      clst++;clst++;
-      pkt=*clst;
-      continue;
-    }
-
-    do{ // handle this packet
-      xc0=pkt[3]&0xc0;
-      if(xc0==0x00){
-        advanced+=can_advance;
-        stat_no_scramble++;
-        break;
-      }
-      if(xc0==0x40){
-        advanced+=can_advance;
-        stat_reserved++;
-        break;
-      }
-      if(xc0==0x80||xc0==0xc0){ // encrypted
-        ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
-        if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
-        if(group_ev_od==ev_od){ // could be added to group
-          pkt[3]&=0x3f;  // consider it decrypted now
-          if(pkt[3]&0x20){ // incomplete packet
-            offset=4+pkt[4]+1;
-            len=188-offset;
-            n=len>>3;
-            residue=len-(n<<3);
-            if(n==0){ // decrypted==encrypted!
-              advanced+=can_advance;
-              stat_decrypted_mini++;
-              break; // this doesn't need more processing
-            }
-          }else{
-            len=184;
-            offset=4;
-            n=23;
-            residue=0;
-          }
-          g_pkt[grouped]=pkt;
-          g_len[grouped]=len;
-          g_offset[grouped]=offset;
-          g_n[grouped]=n;
-          g_residue[grouped]=residue;
-          grouped++;
-          advanced+=can_advance;
-          stat_decrypted[ev_od]++;
-        }
-        else{
-          can_advance=0;
-          break; // skip and go on
-        }
-      }
-    } while(0);
-
-    if(can_advance){
-      // move range start forward
-      *clst+=188;
-    }
-    // next packet, if there is one
-    pkt+=188;
-  } while(1);
-
-  // delete empty ranges and compact list
-  clst2=cluster;
-  for(clst=cluster;*clst!=NULL;clst+=2){
-    // if not empty
-    if(*clst<*(clst+1)){
-      // it will remain 
-      *clst2=*clst;
-      *(clst2+1)=*(clst+1);
-      clst2+=2;
-    }
-  }
-  *clst2=NULL;
-
-  if(grouped==0){
-    // no processing needed
-    return advanced;
-  }
-
-  //  sort them, longest payload first
-  //  we expect many n=23 packets and a few n<23
-  // grouped is always <= GROUP_PARALLELISM
-
-#define g_swap(a,b) \
-    pkt=g_pkt[a]; \
-    g_pkt[a]=g_pkt[b]; \
-    g_pkt[b]=pkt; \
-\
-    len=g_len[a]; \
-    g_len[a]=g_len[b]; \
-    g_len[b]=len; \
-\
-    offset=g_offset[a]; \
-    g_offset[a]=g_offset[b]; \
-    g_offset[b]=offset; \
-\
-    n=g_n[a]; \
-    g_n[a]=g_n[b]; \
-    g_n[b]=n; \
-\
-    residue=g_residue[a]; \
-    g_residue[a]=g_residue[b]; \
-    g_residue[b]=residue;
-
-  // step 1: move n=23 packets before small packets
-  t23=0;
-  tsmall=grouped-1;
-  for(;;){
-    for(;t23<grouped;t23++){
-      if(g_n[t23]!=23) break;
-    }
-    
-    for(;tsmall>=0;tsmall--){
-      if(g_n[tsmall]==23) break;
-    }
-    
-    if(tsmall-t23<1) break;
-    
-
-    g_swap(t23,tsmall);
-
-    t23++;
-    tsmall--;
-  }
-
-  // step 2: sort small packets in decreasing order of n (bubble sort is enough)
-  for(i=t23;i<grouped;i++){
-    for(j=i+1;j<grouped;j++){
-      if(g_n[j]>g_n[i]){
-        g_swap(i,j);
-      }
-    }
-  }
-
-  // we need to know how many packets need 23 iterations, how many 22...
-  for(i=0;i<=23;i++){
-    alive[i]=0;
-  }
-  // count
-  alive[23-1]=t23;
-  for(i=t23;i<grouped;i++){
-    alive[g_n[i]-1]++;
-  }
-  // integrate
-  for(i=22;i>=0;i--){
-    alive[i]+=alive[i+1];
-  }
-
-  // choose key
-  if(group_ev_od==0){
-    k=&keys.even;
-  }
-  else{
-    k=&keys.odd;
-  }
-
-  //INIT
-#define INITIALIZE_UNUSED_INPUT
-#ifdef INITIALIZE_UNUSED_INPUT
-// unnecessary zeroing.
-// without this, we operate on uninitialized memory
-// when grouped<GROUP_PARALLELISM, but it's not a problem,
-// as final results will be discarded.
-// random data makes debugging sessions difficult.
-  for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
-#else
-#endif
-
-  for(g=0;g<grouped;g++){
-    encp[g]=g_pkt[g];
-    encp[g]+=g_offset[g]; // skip header
-    FFTABLEIN(stream_in,g,encp[g]);
-  }
-//dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
-
-
-  // ITER 0
-  iter=0;
-  stream_cypher_group_init(k->iA_g,k->iB_g,stream_in);
-  // fill first ib
-  for(g=0;g<alive[iter];g++){
-    COPY_8_BY(ib+8*g,encp[g]);
-  }
-  // ITER 1..N-1
-  for (iter=1;iter<23&&alive[iter-1]>0;iter++){
-    // alive and just dead packets: calc block
-    block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
-    // all packets (dead too): calc stream
-    stream_cypher_group_normal(stream_out);
-//dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
-
-    // alive packets: calc ib
-    for(g=0;g<alive[iter];g++){
-      FFTABLEOUT(ib+8*g,stream_out,g);
-// XOREQ8BY gcc bug? 2x4 ok, 8 ko    UPDATE: result ok but speed 1-2% slower (!!!???)
-#if 1
-      XOREQ_4_BY(ib+8*g,encp[g]+8);
-      XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
-#else
-      XOREQ_8_BY(ib+8*g,encp[g]+8);
-#endif
-    }
-    // alive packets: decrypt data
-    for(g=0;g<alive[iter];g++){
-      XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
-    }
-    // just dead packets: write decrypted data
-    for(g=alive[iter];g<alive[iter-1];g++){
-      COPY_8_BY(encp[g],block_out+8*g);
-    }
-    // just dead packets: decrypt residue
-    for(g=alive[iter];g<alive[iter-1];g++){
-      FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
-    }
-    // alive packets: pointers++
-    for(g=0;g<alive[iter];g++) encp[g]+=8;
-  };
-  // ITER N
-  iter=23;
-  // calc block
-  block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
-  // just dead packets: write decrypted data
-  for(g=alive[iter];g<alive[iter-1];g++){
-    COPY_8_BY(encp[g],block_out+8*g);
-  }
-  // no residue possible
-  // so do nothing
-
-
-  M_EMPTY(); // restore CPU multimedia state
-
-  return advanced;
-}
diff --git a/contrib/sasc-ng/FFdecsa/tmp_autogenerated_stuff_stream.c b/contrib/sasc-ng/FFdecsa/tmp_autogenerated_stuff_stream.c
deleted file mode 100644
index cb8ef63..0000000
--- a/contrib/sasc-ng/FFdecsa/tmp_autogenerated_stuff_stream.c
+++ /dev/null
@@ -1,814 +0,0 @@
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004  fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-
-// define statics only once, when STREAM_INIT
-#ifdef STREAM_INIT
-static group A[32+10][4]; // 32 because we will move back (virtual shift register)
-static group B[32+10][4]; // 32 because we will move back (virtual shift register)
-static group X[4];
-static group Y[4];
-static group Z[4];
-static group D[4];
-static group E[4];
-static group F[4];
-static group p;
-static group q;
-static group r;
-
-static inline void trasp64_32_88ccw(unsigned char *data){
-/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define row ((unsigned int *)data)
-  int i,j;
-  for(j=0;j<64;j+=32){
-    unsigned int t,b;
-    for(i=0;i<16;i++){
-      t=row[j+i];
-      b=row[j+16+i];
-      row[j+i]   = (t&0x0000ffff)      | ((b           )<<16);
-      row[j+16+i]=((t           )>>16) |  (b&0xffff0000) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned int t,b;
-    for(i=0;i<8;i++){
-      t=row[j+i];
-      b=row[j+8+i];
-      row[j+i]   = (t&0x00ff00ff)     | ((b&0x00ff00ff)<<8);
-      row[j+8+i] =((t&0xff00ff00)>>8) |  (b&0xff00ff00);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned int t,b;
-    for(i=0;i<4;i++){
-      t=row[j+i];
-      b=row[j+4+i];
-      row[j+i]   =((t&0x0f0f0f0f)<<4) |  (b&0x0f0f0f0f);
-      row[j+4+i] = (t&0xf0f0f0f0)     | ((b&0xf0f0f0f0)>>4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned int t,b;
-    for(i=0;i<2;i++){
-      t=row[j+i];
-      b=row[j+2+i];
-      row[j+i]   =((t&0x33333333)<<2) |  (b&0x33333333);
-      row[j+2+i] = (t&0xcccccccc)     | ((b&0xcccccccc)>>2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned int t,b;
-    for(i=0;i<1;i++){
-      t=row[j+i];
-      b=row[j+1+i];
-      row[j+i]   =((t&0x55555555)<<1) |  (b&0x55555555);
-      row[j+1+i] = (t&0xaaaaaaaa)     | ((b&0xaaaaaaaa)>>1);
-    }
-  }
-#undef row
-}
-
-static inline void trasp64_32_88cw(unsigned char *data){
-/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define row ((unsigned int *)data)
-  int i,j;
-  for(j=0;j<64;j+=32){
-    unsigned int t,b;
-    for(i=0;i<16;i++){
-      t=row[j+i];
-      b=row[j+16+i];
-      row[j+i]   = (t&0x0000ffff)      | ((b           )<<16);
-      row[j+16+i]=((t           )>>16) |  (b&0xffff0000) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned int t,b;
-    for(i=0;i<8;i++){
-      t=row[j+i];
-      b=row[j+8+i];
-      row[j+i]   = (t&0x00ff00ff)     | ((b&0x00ff00ff)<<8);
-      row[j+8+i] =((t&0xff00ff00)>>8) |  (b&0xff00ff00);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned int t,b;
-    for(i=0;i<4;i++){
-      t=row[j+i];
-      b=row[j+4+i];
-      row[j+i]  =((t&0xf0f0f0f0)>>4) |   (b&0xf0f0f0f0);
-      row[j+4+i]= (t&0x0f0f0f0f)     |  ((b&0x0f0f0f0f)<<4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned int t,b;
-    for(i=0;i<2;i++){
-      t=row[j+i];
-      b=row[j+2+i];
-      row[j+i]  =((t&0xcccccccc)>>2) |  (b&0xcccccccc);
-      row[j+2+i]= (t&0x33333333)     | ((b&0x33333333)<<2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned int t,b;
-    for(i=0;i<1;i++){
-      t=row[j+i];
-      b=row[j+1+i];
-      row[j+i]  =((t&0xaaaaaaaa)>>1) |  (b&0xaaaaaaaa);
-      row[j+1+i]= (t&0x55555555)     | ((b&0x55555555)<<1);
-    }
-  }
-#undef row
-}
-
-//64-64----------------------------------------------------------
-static inline void trasp64_64_88ccw(unsigned char *data){
-/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define row ((unsigned long long int *)data)
-  int i,j;
-  for(j=0;j<64;j+=64){
-    unsigned long long int t,b;
-    for(i=0;i<32;i++){
-      t=row[j+i];
-      b=row[j+32+i];
-      row[j+i]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      row[j+32+i]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=32){
-    unsigned long long int t,b;
-    for(i=0;i<16;i++){
-      t=row[j+i];
-      b=row[j+16+i];
-      row[j+i]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned long long int t,b;
-    for(i=0;i<8;i++){
-      t=row[j+i];
-      b=row[j+8+i];
-      row[j+i]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned long long int t,b;
-    for(i=0;i<4;i++){
-      t=row[j+i];
-      b=row[j+4+i];
-      row[j+i]   =((t&0x0f0f0f0f0f0f0f0fULL)<<4) |  (b&0x0f0f0f0f0f0f0f0fULL);
-      row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL)     | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned long long int t,b;
-    for(i=0;i<2;i++){
-      t=row[j+i];
-      b=row[j+2+i];
-      row[j+i]   =((t&0x3333333333333333ULL)<<2) |  (b&0x3333333333333333ULL);
-      row[j+2+i] = (t&0xccccccccccccccccULL)     | ((b&0xccccccccccccccccULL)>>2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned long long int t,b;
-    for(i=0;i<1;i++){
-      t=row[j+i];
-      b=row[j+1+i];
-      row[j+i]   =((t&0x5555555555555555ULL)<<1) |  (b&0x5555555555555555ULL);
-      row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL)     | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
-    }
-  }
-#undef row
-}
-
-static inline void trasp64_64_88cw(unsigned char *data){
-/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define row ((unsigned long long int *)data)
-  int i,j;
-  for(j=0;j<64;j+=64){
-    unsigned long long int t,b;
-    for(i=0;i<32;i++){
-      t=row[j+i];
-      b=row[j+32+i];
-      row[j+i]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      row[j+32+i]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=32){
-    unsigned long long int t,b;
-    for(i=0;i<16;i++){
-      t=row[j+i];
-      b=row[j+16+i];
-      row[j+i]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned long long int t,b;
-    for(i=0;i<8;i++){
-      t=row[j+i];
-      b=row[j+8+i];
-      row[j+i]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned long long int t,b;
-    for(i=0;i<4;i++){
-      t=row[j+i];
-      b=row[j+4+i];
-      row[j+i]   =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) |   (b&0xf0f0f0f0f0f0f0f0ULL);
-      row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL)     |  ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned long long int t,b;
-    for(i=0;i<2;i++){
-      t=row[j+i];
-      b=row[j+2+i];
-      row[j+i]   =((t&0xccccccccccccccccULL)>>2) |  (b&0xccccccccccccccccULL);
-      row[j+2+i] = (t&0x3333333333333333ULL)     | ((b&0x3333333333333333ULL)<<2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned long long int t,b;
-    for(i=0;i<1;i++){
-      t=row[j+i];
-      b=row[j+1+i];
-      row[j+i]   =((t&0xaaaaaaaaaaaaaaaaULL)>>1) |  (b&0xaaaaaaaaaaaaaaaaULL);
-      row[j+1+i] = (t&0x5555555555555555ULL)     | ((b&0x5555555555555555ULL)<<1);
-    }
-  }
-#undef row
-}
-
-//64-128----------------------------------------------------------
-static inline void trasp64_128_88ccw(unsigned char *data){
-/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define halfrow ((unsigned long long int *)data)
-  int i,j;
-  for(j=0;j<64;j+=64){
-    unsigned long long int t,b;
-    for(i=0;i<32;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+32+i)];
-      halfrow[2*(j+i)]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      halfrow[2*(j+32+i)]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+32+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      halfrow[2*(j+32+i)+1]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=32){
-    unsigned long long int t,b;
-    for(i=0;i<16;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+16+i)];
-      halfrow[2*(j+i)]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+16+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned long long int t,b;
-    for(i=0;i<8;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+8+i)];
-      halfrow[2*(j+i)]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+8+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned long long int t,b;
-    for(i=0;i<4;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+4+i)];
-      halfrow[2*(j+i)]   =((t&0x0f0f0f0f0f0f0f0fULL)<<4) |  (b&0x0f0f0f0f0f0f0f0fULL);
-      halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL)     | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+4+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0x0f0f0f0f0f0f0f0fULL)<<4) |  (b&0x0f0f0f0f0f0f0f0fULL);
-      halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL)     | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned long long int t,b;
-    for(i=0;i<2;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+2+i)];
-      halfrow[2*(j+i)]   =((t&0x3333333333333333ULL)<<2) |  (b&0x3333333333333333ULL);
-      halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL)     | ((b&0xccccccccccccccccULL)>>2);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+2+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0x3333333333333333ULL)<<2) |  (b&0x3333333333333333ULL);
-      halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL)     | ((b&0xccccccccccccccccULL)>>2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned long long int t,b;
-    for(i=0;i<1;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+1+i)];
-      halfrow[2*(j+i)]   =((t&0x5555555555555555ULL)<<1) |  (b&0x5555555555555555ULL);
-      halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL)     | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+1+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0x5555555555555555ULL)<<1) |  (b&0x5555555555555555ULL);
-      halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL)     | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
-    }
-  }
-#undef halfrow
-}
-
-static inline void trasp64_128_88cw(unsigned char *data){
-/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define halfrow ((unsigned long long int *)data)
-  int i,j;
-  for(j=0;j<64;j+=64){
-    unsigned long long int t,b;
-    for(i=0;i<32;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+32+i)];
-      halfrow[2*(j+i)]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      halfrow[2*(j+32+i)]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+32+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x00000000ffffffffULL)      | ((b                      )<<32);
-      halfrow[2*(j+32+i)+1]=((t                      )>>32) |  (b&0xffffffff00000000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=32){
-    unsigned long long int t,b;
-    for(i=0;i<16;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+16+i)];
-      halfrow[2*(j+i)]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+16+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x0000ffff0000ffffULL)      | ((b&0x0000ffff0000ffffULL)<<16);
-      halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) |  (b&0xffff0000ffff0000ULL) ;
-    }
-  }
-  for(j=0;j<64;j+=16){
-    unsigned long long int t,b;
-    for(i=0;i<8;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+8+i)];
-      halfrow[2*(j+i)]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+8+i)+1];
-      halfrow[2*(j+i)+1]   = (t&0x00ff00ff00ff00ffULL)     | ((b&0x00ff00ff00ff00ffULL)<<8);
-      halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) |  (b&0xff00ff00ff00ff00ULL);
-    }
-  }
-  for(j=0;j<64;j+=8){
-    unsigned long long int t,b;
-    for(i=0;i<4;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+4+i)];
-      halfrow[2*(j+i)]   =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) |   (b&0xf0f0f0f0f0f0f0f0ULL);
-      halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL)     |  ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+4+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) |   (b&0xf0f0f0f0f0f0f0f0ULL);
-      halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL)     |  ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
-    }
-  }
-  for(j=0;j<64;j+=4){
-    unsigned long long int t,b;
-    for(i=0;i<2;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+2+i)];
-      halfrow[2*(j+i)]   =((t&0xccccccccccccccccULL)>>2) |  (b&0xccccccccccccccccULL);
-      halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL)     | ((b&0x3333333333333333ULL)<<2);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+2+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0xccccccccccccccccULL)>>2) |  (b&0xccccccccccccccccULL);
-      halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL)     | ((b&0x3333333333333333ULL)<<2);
-    }
-  }
-  for(j=0;j<64;j+=2){
-    unsigned long long int t,b;
-    for(i=0;i<1;i++){
-      t=halfrow[2*(j+i)];
-      b=halfrow[2*(j+1+i)];
-      halfrow[2*(j+i)]   =((t&0xaaaaaaaaaaaaaaaaULL)>>1) |  (b&0xaaaaaaaaaaaaaaaaULL);
-      halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL)     | ((b&0x5555555555555555ULL)<<1);
-      t=halfrow[2*(j+i)+1];
-      b=halfrow[2*(j+1+i)+1];
-      halfrow[2*(j+i)+1]   =((t&0xaaaaaaaaaaaaaaaaULL)>>1) |  (b&0xaaaaaaaaaaaaaaaaULL);
-      halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL)     | ((b&0x5555555555555555ULL)<<1);
-    }
-  }
-#undef halfrow
-}
-#endif
-
-
-#ifdef STREAM_INIT
-void stream_cypher_group_init(
-  group         iA[8][4], // [In]  iA00,iA01,...iA73 32 groups  | Derived from key.
-  group         iB[8][4], // [In]  iB00,iB01,...iB73 32 groups  | Derived from key.
-  unsigned char *sb)      // [In]  (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input.
-#endif
-#ifdef STREAM_NORMAL
-void stream_cypher_group_normal(
-  unsigned char *cb)    // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output.
-#endif
-{
-#ifdef STREAM_INIT
-  group in1[4];
-  group in2[4];
-#endif
-  group extra_B[4];
-  group fa,fb,fc,fd,fe;
-  group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b;
-  group next_E[4];
-  group tmp0,tmp1,tmp2,tmp3,tmp4;
-#ifdef STREAM_INIT
-  group *sb_g=(group *)sb;
-#endif
-#ifdef STREAM_NORMAL
-  group *cb_g=(group *)cb;
-#endif
-  int aboff;
-  int i,j,k,b;
-
-#ifdef STREAM_INIT
-#endif
-#ifdef STREAM_NORMAL
-#endif
-#ifdef STREAM_INIT
-
-#if GROUP_PARALLELISM==32
-trasp64_32_88ccw(sb);
-#endif
-#if GROUP_PARALLELISM==64
-trasp64_64_88ccw(sb);
-#endif
-#if GROUP_PARALLELISM==128
-trasp64_128_88ccw(sb);
-#endif
-
-#endif
-
-  aboff=32;
-
-#ifdef STREAM_INIT
-  // load first 32 bits of ck into A[aboff+0]..A[aboff+7]
-  // load last  32 bits of ck into B[aboff+0]..B[aboff+7]
-  // all other regs = 0
-  for(i=0;i<8;i++){
-    for(b=0;b<4;b++){
-      A[aboff+i][b]=iA[i][b];
-      B[aboff+i][b]=iB[i][b];
-    }
-  }
-  for(b=0;b<4;b++){
-    A[aboff+8][b]=FF0();
-    A[aboff+9][b]=FF0();
-    B[aboff+8][b]=FF0();
-    B[aboff+9][b]=FF0();
-  }
-  for(b=0;b<4;b++){
-    X[b]=FF0();
-    Y[b]=FF0();
-    Z[b]=FF0();
-    D[b]=FF0();
-    E[b]=FF0();
-    F[b]=FF0();
-  }
-  p=FF0();
-  q=FF0();
-  r=FF0();
-#endif
-
-
-////////////////////////////////////////////////////////////////////////////////
-
-  // EXTERNAL LOOP - 8 bytes per operation
-  for(i=0;i<8;i++){
-
-
-#ifdef STREAM_INIT
-    for(b=0;b<4;b++){
-      in1[b]=sb_g[8*i+4+b];
-      in2[b]=sb_g[8*i+b];
-    }
-#endif
-
-    // INTERNAL LOOP - 2 bits per iteration
-    for(j=0; j<4; j++){
-
-
-      // from A0..A9, 35 bits are selected as inputs to 7 s-boxes
-      // 5 bits input per s-box, 2 bits output per s-box
-
-      // we can select bits with zero masking and shifting operations
-      // and synthetize s-boxes with optimized boolean functions.
-      // this is the actual reason we do all the crazy transposition
-      // stuff to switch between normal and bit slice representations.
-      // this code really flies.
-
-      fe=A[aboff+3][0];fa=A[aboff+0][2];fb=A[aboff+5][1];fc=A[aboff+6][3];fd=A[aboff+8][0];
-/* 1000 1110  1110 0001   : lev  7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) );
-/* 1110 0010  0011 0011   : lev  6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) );
-/* 0011 0110  1000 1101   : lev  5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) );
-/* 0101 0101  1001 0011   : lev  5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) );
-/* 1000 1110  1110 0001   : lev  7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1())));
-/* 1110 0010  0011 0011   : lev  6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1()));
-/* 0011 0110  1000 1101   : lev  5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc)));
-/* 0101 0101  1001 0011   : lev  5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd)));
-      s1a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s1b=FFXOR(tmp2,FFAND(fe,tmp3));
-//dump_mem("s1as1b-fe",&fe,BYPG,BYPG);
-//dump_mem("s1as1b-fa",&fa,BYPG,BYPG);
-//dump_mem("s1as1b-fb",&fb,BYPG,BYPG);
-//dump_mem("s1as1b-fc",&fc,BYPG,BYPG);
-//dump_mem("s1as1b-fd",&fd,BYPG,BYPG);
-
-      fe=A[aboff+1][1];fa=A[aboff+2][2];fb=A[aboff+5][3];fc=A[aboff+6][0];fd=A[aboff+8][1];
-/* 1001 1110  0110 0001   : lev  6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) );
-/* 0000 0011  0111 1011   : lev  5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) );
-/* 1100 0110  1101 0010   : lev  6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) );
-/* 0001 1110  1111 0101   : lev  5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) );
-/* 1001 1110  0110 0001   : lev  6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1()))));
-/* 0000 0011  0111 1011   : lev  5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc));
-/* 1100 0110  1101 0010   : lev  6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1()))));
-/* 0001 1110  1111 0101   : lev  5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd))));
-      s2a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s2b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-      fe=A[aboff+0][3];fa=A[aboff+1][0];fb=A[aboff+4][1];fc=A[aboff+4][3];fd=A[aboff+5][2];
-/* 0100 1011  1001 0110   : lev  5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) );
-/* 1101 0101  1000 1100   : lev  7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) );
-/* 0010 0111  1101 1000   : lev  4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) );
-/* 1111 1111  1111 1111   : lev  0: */ //tmp3=ALL_ONES;
-/* 0100 1011  1001 0110   : lev  5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd)));
-/* 1101 0101  1000 1100   : lev  7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1()))));
-/* 0010 0111  1101 1000   : lev  4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc));
-/* 1111 1111  1111 1111   : lev  0: */ tmp3=FF1();
-      s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1));
-      s3b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-      fe=A[aboff+2][3];fa=A[aboff+0][1];fb=A[aboff+1][3];fc=A[aboff+3][2];fd=A[aboff+7][0];
-/* 1011 0101  0100 1001   : lev  7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) );
-/* 0010 1101  0110 0110   : lev  6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) );
-/* 0110 0111  1101 0000   : lev  7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) );
-/* 1111 1111  1111 1111   : lev  0: */ //tmp3=ALL_ONES;
-/* 1011 0101  0100 1001   : lev  7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1())))));
-/* 0010 1101  0110 0110   : lev  6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc)));
-/* 0110 0111  1101 0000   : lev  7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd)));
-/* 1111 1111  1111 1111   : lev  0: */ tmp3=FF1();
-      s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0)));
-      s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3));
-
-      fe=A[aboff+4][2];fa=A[aboff+3][3];fb=A[aboff+5][0];fc=A[aboff+7][1];fd=A[aboff+8][2];
-/* 1000 1111  0011 0010   : lev  7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) );
-/* 0110 1011  0000 1011   : lev  6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) );
-/* 0001 1010  0111 1001   : lev  6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) );
-/* 0101 1101  1101 0101   : lev  4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd );
-/* 1000 1111  0011 0010   : lev  7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1()));
-/* 0110 1011  0000 1011   : lev  6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd)))));
-/* 0001 1010  0111 1001   : lev  6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd)));
-/* 0101 1101  1101 0101   : lev  4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd);
-      s5a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s5b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-      fe=A[aboff+2][1];fa=A[aboff+3][1];fb=A[aboff+4][0];fc=A[aboff+6][2];fd=A[aboff+8][3];
-/* 0011 0110  0010 1101   : lev  6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) );
-/* 1110 1110  1011 1011   : lev  3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES );
-/* 0101 1000  0110 0111   : lev  6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) );
-/* 0001 0011  0000 0001   : lev  5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) );
-/* 0011 0110  0010 1101   : lev  6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc));
-/* 1110 1110  1011 1011   : lev  3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1());
-/* 0101 1000  0110 0111   : lev  6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd)));
-/* 0001 0011  0000 0001   : lev  5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd)));
-      s6a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s6b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-      fe=A[aboff+1][2];fa=A[aboff+2][0];fb=A[aboff+6][1];fc=A[aboff+7][2];fd=A[aboff+7][3];
-/* 0111 1000  1001 0110   : lev  5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) );
-/* 0100 1001  0101 1011   : lev  6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) );
-/* 0100 1001  1011 1001   : lev  5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) );
-/* 1111 1111  1101 1101   : lev  3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) );
-/* 0111 1000  1001 0110   : lev  5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd))));
-/* 0100 1001  0101 1011   : lev  6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd))));
-/* 0100 1001  1011 1001   : lev  5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd));
-/* 1111 1111  1101 1101   : lev  3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1()));
-      s7a=FFXOR(tmp0,FFAND(fe,tmp1));
-      s7b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-
-/*
-      we have just done this:
-      
-      int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0};
-      int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1};
-      int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1};
-      int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1};
-      int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2};
-      int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0};
-      int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2};
-
-      s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ]
-           |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ];
-      s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ]
-           |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ];
-      s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ]
-           |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ];
-      s7 =  sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ];
-*/
-
-      // use 4x4 xor to produce extra nibble for T3
-
-      extra_B[3]=FFXOR(FFXOR(FFXOR(B[aboff+2][0],B[aboff+5][1]),B[aboff+6][2]),B[aboff+8][3]);
-      extra_B[2]=FFXOR(FFXOR(FFXOR(B[aboff+5][0],B[aboff+7][1]),B[aboff+2][3]),B[aboff+3][2]);
-      extra_B[1]=FFXOR(FFXOR(FFXOR(B[aboff+4][3],B[aboff+7][2]),B[aboff+3][0]),B[aboff+4][1]);
-      extra_B[0]=FFXOR(FFXOR(FFXOR(B[aboff+8][2],B[aboff+5][3]),B[aboff+2][1]),B[aboff+7][0]);
-
-      // T1 = xor all inputs
-      // in1, in2, D are only used in T1 during initialisation, not generation
-      for(b=0;b<4;b++){
-        A[aboff-1][b]=FFXOR(A[aboff+9][b],X[b]);
-      }
-
-#ifdef STREAM_INIT
-      for(b=0;b<4;b++){
-        A[aboff-1][b]=FFXOR(FFXOR(A[aboff-1][b],D[b]),((j % 2) ? in2[b] : in1[b]));
-      }
-#endif
-
-
-      // T2 =  xor all inputs
-      // in1, in2 are only used in T1 during initialisation, not generation
-      // if p=0, use this, if p=1, rotate the result left
-      for(b=0;b<4;b++){
-        B[aboff-1][b]=FFXOR(FFXOR(B[aboff+6][b],B[aboff+9][b]),Y[b]);
-      }
-
-#ifdef STREAM_INIT
-      for(b=0;b<4;b++){
-        B[aboff-1][b]=FFXOR(B[aboff-1][b],((j % 2) ? in1[b] : in2[b]));
-      }
-#endif
-
-
-      // if p=1, rotate left (yes, this is what we're doing)
-      tmp3=B[aboff-1][3];
-      B[aboff-1][3]=FFXOR(B[aboff-1][3],FFAND(FFXOR(B[aboff-1][3],B[aboff-1][2]),p));
-      B[aboff-1][2]=FFXOR(B[aboff-1][2],FFAND(FFXOR(B[aboff-1][2],B[aboff-1][1]),p));
-      B[aboff-1][1]=FFXOR(B[aboff-1][1],FFAND(FFXOR(B[aboff-1][1],B[aboff-1][0]),p));
-      B[aboff-1][0]=FFXOR(B[aboff-1][0],FFAND(FFXOR(B[aboff-1][0],tmp3),p));
-
-
-      // T3 = xor all inputs
-      for(b=0;b<4;b++){
-        D[b]=FFXOR(FFXOR(E[b],Z[b]),extra_B[b]);
-      }
-
-
-      // T4 = sum, carry of Z + E + r
-      for(b=0;b<4;b++){
-        next_E[b]=F[b];
-      }
-
-      tmp0=FFXOR(Z[0],E[0]);
-      tmp1=FFAND(Z[0],E[0]);
-      F[0]=FFXOR(E[0],FFAND(q,FFXOR(Z[0],r)));
-      tmp3=FFAND(tmp0,r);
-      tmp4=FFOR(tmp1,tmp3);
-
-      tmp0=FFXOR(Z[1],E[1]);
-      tmp1=FFAND(Z[1],E[1]);
-      F[1]=FFXOR(E[1],FFAND(q,FFXOR(Z[1],tmp4)));
-      tmp3=FFAND(tmp0,tmp4);
-      tmp4=FFOR(tmp1,tmp3);
-
-      tmp0=FFXOR(Z[2],E[2]);
-      tmp1=FFAND(Z[2],E[2]);
-      F[2]=FFXOR(E[2],FFAND(q,FFXOR(Z[2],tmp4)));
-      tmp3=FFAND(tmp0,tmp4);
-      tmp4=FFOR(tmp1,tmp3);
-
-      tmp0=FFXOR(Z[3],E[3]);
-      tmp1=FFAND(Z[3],E[3]);
-      F[3]=FFXOR(E[3],FFAND(q,FFXOR(Z[3],tmp4)));
-      tmp3=FFAND(tmp0,tmp4);
-      r=FFXOR(r,FFAND(q,FFXOR(FFOR(tmp1,tmp3),r))); // ultimate carry
-
-/*
-      we have just done this: (believe it or not)
-      
-      if (q) {
-        F = Z + E + r;
-        r = (F >> 4) & 1;
-        F = F & 0x0f;
-      }
-      else {
-          F = E;
-      }
-*/
-      for(b=0;b<4;b++){
-        E[b]=next_E[b];
-      }
-
-      // this simple instruction is virtually shifting all the shift registers
-      aboff--;
-
-/*
-      we've just done this:
-
-      A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0;
-      B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0;
-*/
-
-      X[0]=s1a;
-      X[1]=s2a;
-      X[2]=s3b;
-      X[3]=s4b;
-      Y[0]=s3a;
-      Y[1]=s4a;
-      Y[2]=s5b;
-      Y[3]=s6b;
-      Z[0]=s5a;
-      Z[1]=s6a;
-      Z[2]=s1b;
-      Z[3]=s2b;
-      p=s7a;
-      q=s7b;
-
-#ifdef STREAM_NORMAL
-      // require 4 loops per output byte
-      // 2 output bits are a function of the 4 bits of D
-      // xor 2 by 2
-      cb_g[8*i+7-2*j]=FFXOR(D[2],D[3]);
-      cb_g[8*i+6-2*j]=FFXOR(D[0],D[1]);
-#endif
-
-
-    } // INTERNAL LOOP
-
-
-  } // EXTERNAL LOOP
-
-  // move 32 steps forward, ready for next call
-  for(k=0;k<10;k++){
-    for(b=0;b<4;b++){
-      A[32+k][b]=A[k][b];
-      B[32+k][b]=B[k][b];
-    }
-  }
-
-
-////////////////////////////////////////////////////////////////////////////////
-
-#ifdef STREAM_NORMAL
-
-#if GROUP_PARALLELISM==32
-trasp64_32_88cw(cb);
-#endif
-#if GROUP_PARALLELISM==64
-trasp64_64_88cw(cb);
-#endif
-#if GROUP_PARALLELISM==128
-trasp64_128_88cw(cb);
-#endif
-
-#endif
-
-#ifdef STREAM_INIT
-#endif
-#ifdef STREAM_NORMAL
-#endif
-
-}
-
diff --git a/contrib/sasc-ng/FFdecsa/vdr_patches/README_vdr.txt b/contrib/sasc-ng/FFdecsa/vdr_patches/README_vdr.txt
deleted file mode 100644
index 389d05c..0000000
--- a/contrib/sasc-ng/FFdecsa/vdr_patches/README_vdr.txt
+++ /dev/null
@@ -1,58 +0,0 @@
--------
-FFdecsa
--------
-
-This directory contains patches to use FFdecsa with vdr, by means of a
-new FFdecsa-based SoftCSA.
-
-You don't need a SoftCSA patch!!!
-
-Step by step instructions:
-
-- create a directory somewhere, we will call this dir $BASE
-
-- download vdr-1.3.11.tar.bz2 and put it in $BASE
-
-- download vdr-sc-0.3.15.tar.gz and put it in $BASE
-
-- download FFdecsa-1.0.0.tar.bz2 and put it in $BASE
-
-- cd $BASE
-
-- tar xvjf vdr-1.3.11.tar.bz2
-
-- cd vdr-1.3.11/PLUGINS/src/
-
-- tar xvzf ../../../vdr-sc-0.3.15.tar.gz
-
-- ln -s sc-0.3.15 sc
-
-- cd $BASE/vdr-1.3.11
-
-- tar xvjf ../FFdecsa-1.0.0.tar.bz2
-
-- ln -s FFdecsa-1.0.0 FFdecsa
-
-- patch -p1 <PLUGINS/src/sc-0.3.15/patches/vdr-1.3.10-sc.diff
-
-- patch -p1 <FFdecsa/vdr_patches/vdr-1.3.11-FFdecsa.diff
-
-- cd FFdecsa
-
-- optional: edit Makefile
-
-- make
-
-- ./FFdecsa_test
-
-- cd $BASE/vdr-1.3.11
-
-- cp Make.config.template Make.config
-
-- optional: edit Make.config
-
-- make
-
-- make plugins
-
-Good luck!
diff --git a/contrib/sasc-ng/Makefile b/contrib/sasc-ng/Makefile
index 37b0936..615d60c 100644
--- a/contrib/sasc-ng/Makefile
+++ b/contrib/sasc-ng/Makefile
@@ -78,6 +78,7 @@ clean:
 	@-rm -f objs/dload.o
 	@-rm -f $(TOOL)
 	@-rm -f libscanwrap.so
+	@-rm -f FFdecsa/*
 
 module_clean:
 	cd dvbloopback/module && $(MAKE) clean
@@ -93,8 +94,14 @@ else
 	$(MAKE) -C $(SCDIR) $(SCOPTS) CXX=$(CXX) CXXFLAGS="$(SC_FLAGS)" SASC=1 STATIC=1 all
 endif
 
-FFdecsa/FFdecsa.o:
-	 $(MAKE) -C FFdecsa $(FFDECSA_OPTS)
+link-FFdecsa:
+	@(cd FFdecsa; \
+          for i in `find ../../../FFdecsa/ -maxdepth 1 -type f -name Makefile -or -iname "*.h" -or -iname "*.c"`; \
+            do ln -sf $$i `basename $$i`; \
+          done)
+
+FFdecsa/FFdecsa.o: link-FFdecsa
+	$(MAKE) -C FFdecsa $(FFDECSA_OPTS)
 
 module:
 	cd dvbloopback/module && $(MAKE) $(DVB_MOD_DIR)
@@ -124,7 +131,7 @@ objs/libsi.a: $(OBJ_LIBSI)
 objs/%.o: $(LBDIR)/%.c $(INC_DEPS)
 	$(CXX) $(CXXFLAGS) -o $@ -c  $(DEFINES) -I$(LBDIR) $(INCLUDES) $<
 
-objs/%.o: dvblb_plugins/%.c $(INC_DEPS) $(INC_DEPS_LB)
+objs/%.o: dvblb_plugins/%.c $(INC_DEPS) $(INC_DEPS_LB) link-FFdecsa
 	$(CXX) $(CXXFLAGS) -o $@ -c  $(DEFINES) -I$(LBDIR) $(INCLUDES) $<
 
 objs/%.o: sc/%.cpp
diff --git a/contrib/sasc-ng/configure b/contrib/sasc-ng/configure
index ca54092..c8164b6 100644
--- a/contrib/sasc-ng/configure
+++ b/contrib/sasc-ng/configure
@@ -174,9 +174,10 @@ if test "x$ffdecsa_opt" != "xno"; then
    else
      FLAGS=$ffdecsa_flags
    fi
+   FFdecsaDIR="../../FFdecsa"
    TMPOUT="${TMPDIR}/FFdecsa/out"
    mkdir "${TMPDIR}/FFdecsa"
-   cp FFdecsa/*.c FFdecsa/*.h FFdecsa/Makefile "${TMPDIR}/FFdecsa/"
+   cp $FFdecsaDIR/*.c $FFdecsaDIR/*.h $FFdecsaDIR/Makefile "${TMPDIR}/FFdecsa/"
    echo "Trying various FFdecsa optimizations..."
    for var in ${FFDECSA_MODES}; do
      make -C "${TMPDIR}/FFdecsa" FFdecsa_test "PARALLEL_MODE=${var}" "${FLAGS}" "COMPILER=$CXX" >/dev/null 2>&1