标题:有大神能看懂C代码改写的java代码有什么错误吗?
只看楼主
hyj0616
Rank: 1
等 级:新手上路
帖 子:26
专家分:0
注 册:2016-8-1
结帖率:66.67%
 问题点数:0 回复次数:5 
有大神能看懂C代码改写的java代码有什么错误吗?
如果可以,可以补发代码
搜索更多相关主题的帖子: java 
2016-08-02 15:56
远方的狼A
Rank: 1
等 级:新手上路
威 望:1
帖 子:8
专家分:5
注 册:2016-8-2
得分:0 
你不发出来怎么看呢;
2016-08-02 17:49
hyj0616
Rank: 1
等 级:新手上路
帖 子:26
专家分:0
注 册:2016-8-1
得分:0 
C代码
int file_chunk_cdc(int fd, vector* features) {
    unsigned char buf[BUF_MAX_SIZE] = {0};
    unsigned char buf_bz[BUF_MAX_SIZE] = {0};
    unsigned char block_buf[BLOCK_MAX_SIZE * 2] = {0};
    unsigned char last_block_buf[BLOCK_MAX_SIZE * 2] = {0};
    char win_buf[BLOCK_WIN_SIZE + 1] = {0};
    unsigned char md5_str[33] = {0};
    unsigned char adler_pre_char;
    unsigned char md5_checksum[32 + 1] = {0};
    unsigned int bpos = 0;
    unsigned int rwsize = 0, bzsize = 0;
    unsigned int exp_rwsize = BUF_MAX_SIZE;
    unsigned int head, tail;
    unsigned int block_sz = 0, old_block_sz = 0;
    unsigned int hkey = 0;
    int ret = 0;

    feature_t f = 0;
    while(rwsize = read(fd, buf + bpos, exp_rwsize))
    {
        /* last chunk */
        
        if ((rwsize + bpos + block_sz) < BLOCK_MIN_SIZE){
            break;
        }

        head = 0;
        tail = bpos + rwsize;
        /* avoid unnecessary computation and comparsion */
        if (block_sz < (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE))
        {
            old_block_sz = block_sz;
            block_sz = ((block_sz + tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : block_sz + tail -head;  
            memcpy(block_buf + old_block_sz, buf + head, block_sz - old_block_sz);
            head += (block_sz - old_block_sz);
        }

        while ((head + BLOCK_WIN_SIZE) <= tail)
        {
            memcpy(win_buf, buf + head, BLOCK_WIN_SIZE);
            /*
             * Firstly, i think rabinhash is the best. However, it's performance is very bad.
             * After some testing, i found ELF_hash is better both on performance and dedup rate.
             * So, EFL_hash is default. Now, adler_hash as default.
             */
            if (g_rolling_hash)
            {
                hkey = (block_sz == (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ? adler32_checksum(win_buf, BLOCK_WIN_SIZE) :
                    adler32_rolling_checksum(hkey, BLOCK_WIN_SIZE, adler_pre_char, buf[head+BLOCK_WIN_SIZE-1]);
            }
            else
                hkey = g_cdc_chunk_hashfunc(win_buf);

            /* get a normal chunk */
            if ((hkey % g_block_size) == CHUNK_CDC_R)
            {
                memcpy(block_buf + block_sz, buf + head, BLOCK_WIN_SIZE);
                head += BLOCK_WIN_SIZE;
                block_sz += BLOCK_WIN_SIZE;
                if (block_sz >= BLOCK_MIN_SIZE)
                {
                    md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz,
                        md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_reggile_block_process in file_chunk_cdc");
                        goto _FILE_CHUNK_CDC_EXIT;
                    }
                    */
                    block_sz = 0;
                }
            }
            else
            {
                block_buf[block_sz++] = buf[head++];
                /* get an abnormal chunk */
                if (block_sz >= BLOCK_MAX_SIZE)
                {
                    md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz,
                        md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_reggile_block_process in file_chunk_cdc");
                        goto _FILE_CHUNK_CDC_EXIT;
                    }
                    */
                    block_sz = 0;
                }
            }

            /* avoid unnecessary computation and comparsion */
            if (block_sz == 0)
            {
                block_sz = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : tail - head;
                memcpy(block_buf, buf + head, block_sz);
                head = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    head + (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE) : tail;
            }

            adler_pre_char = buf[head -1];
        }

        /* read expected data from file to full up buf */
        bpos = tail - head;
        exp_rwsize = BUF_MAX_SIZE - bpos;
        adler_pre_char = buf[head -1];
        memmove(buf, buf + head, bpos);
    }
    /* last chunk */

    int last_block_len = ((rwsize + bpos + block_sz) >= 0) ? rwsize + bpos + block_sz : 0;
    if (last_block_len > 0)
    {
        memcpy(last_block_buf, block_buf, block_sz);
        memcpy(last_block_buf + block_sz, buf, rwsize + bpos);

        md5(last_block_buf, last_block_len, md5_checksum);
        f = md5_2_feature(md5_checksum);
        VEC_PUSH_BACK(features, &f);
    }

_FILE_CHUNK_CDC_EXIT:
    return 0;
}


/* slide block chunk */
int file_chunk_sb(int fd, vector* features) {
    char buf[BUF_MAX_SIZE] = {0};
    char buf_bz[BUF_MAX_SIZE] = {0};
    char win_buf[BLOCK_MAX_SIZE * 2] = {0};
    char block_buf[BLOCK_MAX_SIZE * 2] = {0};
    char adler_pre_char;
    unsigned char md5_checksum[32 + 1] = {0};
    unsigned char md5_checksum1[32 + 1] = {0};
    char crc_checksum[16] = {0};
    unsigned int bpos = 0;
    unsigned int slide_sz = 0;
    unsigned int rwsize = 0, bzsize = 0, bzsize_f = 0;
    unsigned int exp_rwsize = BUF_MAX_SIZE;
    unsigned int head, tail;
    unsigned int hkey = 0;
    unsigned int bflag = 0;
    int ret = 0;

    hashtable* sb_htable = create_hashtable(g_htab_bucket_nr);
    hashtable* sb_htable_crc = create_hashtable(g_htab_bucket_nr);
    if (NULL == sb_htable_crc || sb_htable == NULL)
        return -1;

    feature_t f, f1;
    while(rwsize = read(fd, buf + bpos, exp_rwsize)) {
        /* last chunk */
        /*
        if ((rwsize + bpos + slide_sz) < g_block_size)
            break;
        */
        head = 0;
        tail = bpos + rwsize;
        while ((head + g_block_size) <= tail) {
            memcpy(win_buf, buf + head, g_block_size);
            hkey = (slide_sz == 0) ? adler32_checksum(win_buf, g_block_size) :
                adler32_rolling_checksum(hkey, g_block_size, adler_pre_char, buf[head+g_block_size-1]);

            uint_2_str(hkey, crc_checksum);

            /* bflag: 0, both CRC and MD5 are not idenitical
                      1, both CRC and MD5 are identical
                  2, CRC is identical and MD5 is not
             */
            bflag = 0;

            /* this block maybe is duplicate */
            bzsize = g_block_size;
            if (hash_exist(sb_htable_crc, crc_checksum))
            {   
                bflag = 2;
                md5((unsigned char*)win_buf, bzsize, md5_checksum);
                f = md5_2_feature(md5_checksum);
                md5_2_str(md5_checksum);
                if (hash_exist(sb_htable, (char*)md5_checksum))
                {
                    /* insert fragment */
                    if (slide_sz != 0)
                    {
                        md5((unsigned char*)block_buf, slide_sz, md5_checksum1);
                        f1 = md5_2_feature(md5_checksum1);
                        VEC_PUSH_BACK(features, &f1);
                          
                        /*  
                        if (0 != (ret = dedup_regfile_block_process(block_buf, slide_sz, md5_checksum1,
                            fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                        {
                            perror("dedup_regfile_block_process in file_chunk_sb");
                            goto _FILE_CHUNK_SB_EXIT;
                        }
                        */
                    }

                    VEC_PUSH_BACK(features, &f);
                    /* insert fixed-size block */
                    /*
                    if (0 != (ret = dedup_regfile_block_process(win_buf, bzsize, md5_checksum,
                        fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_regfile_block_process in file_chunk_sb");
                        goto _FILE_CHUNK_SB_EXIT;
                    }
                    */

                    head += g_block_size;
                    slide_sz = 0;
                    bflag = 1;
                }
            }

            /* this block is not duplicate */
            if (bflag != 1)
            {
                block_buf[slide_sz++] = buf[head++];
                if (slide_sz == g_block_size)
                {
                    bzsize = g_block_size;

                    /* calculate checksum and check in */
                    hkey = adler32_checksum(block_buf, bzsize);
                    uint_2_str(hkey, crc_checksum);
                    hash_checkin(sb_htable_crc, crc_checksum);

                    md5((unsigned char*)block_buf, bzsize, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, bzsize, md5_checksum,
                        fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_regfile_block_process in file_chunk_sb");
                        goto _FILE_CHUNK_SB_EXIT;
                    }
                    */

                    slide_sz = 0;
                }
            }

            adler_pre_char = buf[head - 1];
        }

        /* read expected data from file to full up buf */
        bpos = tail - head;
        exp_rwsize = BUF_MAX_SIZE - bpos;
        adler_pre_char = buf[head - 1];
        memmove(buf, buf + head, bpos);
    }
    /* last chunk */
    /*
    *last_block_len = ((rwsize + bpos + slide_sz) > 0) ? rwsize + bpos + slide_sz : 0;
    if (*last_block_len > 0)
    {
        memcpy(last_block_buf, block_buf, slide_sz);
        memcpy(last_block_buf + slide_sz, buf, rwsize + bpos);
    }
    */
_FILE_CHUNK_SB_EXIT:
    lseek(fd, 0, SEEK_SET);
    hash_free(sb_htable);
    hash_free(sb_htable_crc);
    return 0;
}

java代码

package cn.edu.cust.deduple;

import
import
import
import
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import cn.edu.cust.deduple.utils.Checksum;
import cn.edu.cust.deduple.utils.MapUtils;
import cn.edu.cust.deduple.utils.Md5Util;

/**
 * @ClassName: CDC
 * @Description:
 * @author: 王大鸟
 * @date: 2016/5/18
 */
public class CDC {
    /**
     * rabin指纹之除数
     */
    private static final int D = 5;
    /**
     * rabin指纹之商
     */
    private static final int F = 3;
    /**
     * 最大值
     */
    private static final int MAX = 8 * 1024;
    /**
     * 最小值
     */
    private static final int MIN = 4 * 1024;
    /**
     * 滑动窗口大小
     */
    private static final int WINLEN = 4 * 1024;

    private static Map<String, Long> fingerPrints = new HashMap<String, Long>();
   
    static final int BUF_MAX_SZ = 128 * 1024;
    static final int BLOCK_MAX_SZ = 4096;
    static final int BLOCK_WIN_SZ = 32;
    static final int BLOCK_MIN_SZ = 64;
   
    static final int BLOCK_SZ = 4096;
    static final int CHUNK_CDC_R = 13;
   
    FileChunks fileChunk(File f) throws IOException {
        byte buf[] = new byte[BUF_MAX_SZ];
        byte block_buf[] = new byte[BLOCK_MAX_SZ + BLOCK_WIN_SZ];
        byte win_buf[] = new byte[BLOCK_WIN_SZ + 1];
        byte adler_pre_char = 0;
        //unsigned char md5_checksum[16 + 1] = {0};
        //unsigned char csum[10 + 1] = {0};
        int bpos = 0;
        int rwsize = 0;
        int exp_rwsize = BUF_MAX_SZ;
        int head, tail;
        int block_sz = 0, old_block_sz = 0;
        long hkey = 0;
        //chunk_block_entry chunk_bentry;
        long offset = 0;

        FileInputStream fin = new FileInputStream(f);
        FileChannel fc = fin.getChannel();
        ByteBuffer bb = ByteBuffer.wrap(buf, bpos, exp_rwsize);
        FileChunks fcs = new FileChunks();
        
        while((rwsize = fc.read(bb)) >= 0) {
            /* last chunk */
            System.out.println("rwsize:" + rwsize);
            if ((rwsize + bpos + block_sz) < BLOCK_MIN_SZ)
                break;

            head = 0;
            tail = bpos + rwsize;
            /* avoid unnecessary computation and comparsion */
            if (block_sz < (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) {
                old_block_sz = block_sz;
                block_sz = ((block_sz + tail - head) > (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) ?
                    BLOCK_MIN_SZ - BLOCK_WIN_SZ : block_sz + tail -head;
                System.arraycopy(buf, head, block_buf, old_block_sz, block_sz - old_block_sz);
                //memcpy(block_buf + old_block_sz, buf + head, block_sz - old_block_sz);
                head += (block_sz - old_block_sz);
            }

            while ((head + BLOCK_WIN_SZ) <= tail) {
                System.arraycopy(buf, head, win_buf, 0, BLOCK_WIN_SZ);
                //memcpy(win_buf, buf + head, BLOCK_WIN_SZ);
                hkey = (block_sz == (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) ? Checksum.adler32_checksum(win_buf, BLOCK_WIN_SZ) :
                    Checksum.adler32_rolling_checksum((int)hkey, BLOCK_WIN_SZ, adler_pre_char, buf[head+BLOCK_WIN_SZ-1]);
                //System.out.println("hkey:" + (hkey % BLOCK_SZ));
                /* get a normal chunk, write block info to chunk file */
                if ((hkey % BLOCK_SZ) == CHUNK_CDC_R) {
                    //System.out.println(block_sz + BLOCK_WIN_SZ);
                    System.arraycopy(buf, head, block_buf, block_sz, BLOCK_WIN_SZ);
                    //memcpy(block_buf + block_sz, buf + head, BLOCK_WIN_SZ);
                    head += BLOCK_WIN_SZ;
                    block_sz += BLOCK_WIN_SZ;
                    if(block_sz > BLOCK_MAX_SZ){
                        System.out.println(">4096:" + block_sz);
                    }
                    if (block_sz >= BLOCK_MIN_SZ) {
                        fcs.addChunk(offset, block_sz, block_buf);
                        /*md5(block_buf, block_sz, md5_checksum);
                        uint_2_str(adler32_checksum(block_buf, block_sz), csum);
                        chunk_file_hdr->block_nr++;
                        chunk_bentry.len = block_sz;
                        chunk_bentry.offset = offset;
                        memcpy(chunk_bentry.md5, md5_checksum, 16 + 1);
                        memcpy(chunk_bentry.csum, csum, 10 + 1);
                        rwsize = write(fd_chunk, &chunk_bentry, CHUNK_BLOCK_ENTRY_SZ);
                        if (rwsize == -1 || rwsize != CHUNK_BLOCK_ENTRY_SZ)
                            return -1;*/
                        offset += block_sz;
                        block_sz = 0;
                    }
                } else {
                    block_buf[block_sz++] = buf[head++];
                    /* get an abnormal chunk, write block info to chunk file */
                    if (block_sz >= BLOCK_MAX_SZ) {
                        fcs.addChunk(offset, block_sz, block_buf);
                        /*md5(block_buf, block_sz, md5_checksum);
                        uint_2_str(adler32_checksum(block_buf, block_sz), csum);
                        chunk_file_hdr->block_nr++;
                        chunk_bentry.len = block_sz;
                        chunk_bentry.offset = offset;
                        memcpy(chunk_bentry.md5, md5_checksum, 16+1);
                        memcpy(chunk_bentry.csum, csum, 10 + 1);
                        rwsize = write(fd_chunk, &chunk_bentry, CHUNK_BLOCK_ENTRY_SZ);
                        if (rwsize == -1 || rwsize != CHUNK_BLOCK_ENTRY_SZ)
                            return -1;*/
                        offset += block_sz;
                        block_sz = 0;
                    }
                }

                /* avoid unnecessary computation and comparsion */
                if (block_sz == 0) {
                    block_sz = ((tail - head) > (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) ?
                        BLOCK_MIN_SZ - BLOCK_WIN_SZ : tail - head;
                    System.arraycopy(buf, head, block_buf, 0, block_sz);
                    //memcpy(block_buf, buf + head, block_sz);
                    head = ((tail - head) > (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) ?
                        head + (BLOCK_MIN_SZ - BLOCK_WIN_SZ) : tail;
                }
                adler_pre_char = buf[head - 1];
            }

            /* read expected data from file to full up buf */
            bpos = tail - head;
            exp_rwsize = BUF_MAX_SZ - bpos;
            adler_pre_char = buf[head - 1];
            System.arraycopy(buf, head, buf, 0, bpos);
            //memmove(buf, buf + head, bpos);
            bb = ByteBuffer.wrap(buf, bpos, exp_rwsize);
        }

        fin.close();
        return fcs;
        /*if (rwsize == -1)
            return -1;

        return 0;*/
    }

    /**
     * CDC算法实现
     *
     * @param file
     * @throws IOException
     * @throws NoSuchAlgorithmException
     */
    public void process(File file) throws IOException, NoSuchAlgorithmException {
        byte[] win_buf = new byte[WINLEN];
        long win_tail = 0;
        String fingerPrint = null;
        byte[] buf = new byte[MAX];
        RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
        long fileLength = randomAccessFile.length();

        while (randomAccessFile.read(win_buf) > 0) {
            long filePointer = randomAccessFile.getFilePointer();
            //如果大于最大值
            if (filePointer - win_tail > MAX) {
                randomAccessFile.seek(win_tail);
                randomAccessFile.read(buf);
                fingerPrint = Md5Util.getMD5(buf);
                MapUtils.judgeKey(fingerPrints, fingerPrint);
                win_tail = filePointer;
                continue;
            }
            //满足rabin指纹
            if ((Arrays.hashCode(win_buf) % D == F) && !((filePointer - win_tail) < MIN)) {
                offset(randomAccessFile, filePointer, win_tail);
                win_tail = filePointer;
            } else if (filePointer == fileLength) {
                offset(randomAccessFile, filePointer, win_tail);
                break;
            } else {
                randomAccessFile.seek(filePointer - (4 * 1024) + 1);
            }
        }
    }

    /**
     * 重复代码封装
     *
     * @param randomAccessFile
     * @param filePointer
     * @param win_tail
     * @throws IOException
     * @throws NoSuchAlgorithmException
     */
    public void offset(RandomAccessFile randomAccessFile, long filePointer, long win_tail) throws IOException, NoSuchAlgorithmException {
        byte[] tmpbytes = new byte[Integer.parseInt(String.valueOf(filePointer - win_tail))];
        randomAccessFile.seek(win_tail);
        randomAccessFile.read(tmpbytes);
        String fingerPrint = Md5Util.getMD5(tmpbytes);
        MapUtils.judgeKey(fingerPrints, fingerPrint);
    }
   
    public static void main(String[] args) {
        try{
            CDC cdc = new CDC();
            FileChunks fcs = cdc.fileChunk(new File("/home/qh/下载/SPAS data set/2.txt"));
            //fcs.printChunks();
        }catch(IOException ex){
            ex.printStackTrace();
        }
    }
}
2016-08-02 19:23
hyj0616
Rank: 1
等 级:新手上路
帖 子:26
专家分:0
注 册:2016-8-1
得分:0 
回复 楼主 hyj0616
已发,请帮忙!
2016-08-02 19:24
砖家的谎言
Rank: 12Rank: 12Rank: 12
等 级:禁止访问
威 望:30
帖 子:693
专家分:3898
注 册:2013-12-6
得分:0 
嗯,发到我们群里面帮你看看

我不是砖家,要努力成为砖家。
2016-08-06 09:55
仍去寻酒
Rank: 2
来 自:火星
等 级:论坛游民
威 望:1
帖 子:14
专家分:27
注 册:2016-7-16
得分:0 
握草,真长,,好久没看c 表示不懂

不断的发现以前的自己是个傻逼 ,这就是人生
2016-08-24 04:33



参与讨论请移步原网站贴子:https://bbs.bccn.net/thread-467453-1-1.html




关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.132152 second(s), 7 queries.
Copyright©2004-2025, BCCN.NET, All Rights Reserved