标题:C版本要迁移到Java版本!
只看楼主
hyj0616
Rank: 1
等 级:新手上路
帖 子:26
专家分:0
注 册:2016-8-1
结帖率:66.67%
 问题点数:0 回复次数:0 
C版本要迁移到Java版本!
int file_chunk_cdc(int fd, vector* features) {
    unsigned char buf[BUF_MAX_SIZE] = {0};
    unsigned char buf_bz[BUF_MAX_SIZE] = {0};
    unsigned char block_buf[BLOCK_MAX_SIZE * 2] = {0};
    unsigned char last_block_buf[BLOCK_MAX_SIZE * 2] = {0};
    char win_buf[BLOCK_WIN_SIZE + 1] = {0};
    unsigned char md5_str[33] = {0};
    unsigned char adler_pre_char;
    unsigned char md5_checksum[32 + 1] = {0};
    unsigned int bpos = 0;
    unsigned int rwsize = 0, bzsize = 0;
    unsigned int exp_rwsize = BUF_MAX_SIZE;
    unsigned int head, tail;
    unsigned int block_sz = 0, old_block_sz = 0;
    unsigned int hkey = 0;
    int ret = 0;

    feature_t f = 0;
    while(rwsize = read(fd, buf + bpos, exp_rwsize))
    {
        /* last chunk */
        
        if ((rwsize + bpos + block_sz) < BLOCK_MIN_SIZE){
            break;
        }

        head = 0;
        tail = bpos + rwsize;
        /* avoid unnecessary computation and comparsion */
        if (block_sz < (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE))
        {
            old_block_sz = block_sz;
            block_sz = ((block_sz + tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : block_sz + tail -head;  
            memcpy(block_buf + old_block_sz, buf + head, block_sz - old_block_sz);
            head += (block_sz - old_block_sz);
        }

        while ((head + BLOCK_WIN_SIZE) <= tail)
        {
            memcpy(win_buf, buf + head, BLOCK_WIN_SIZE);
            /*
             * Firstly, i think rabinhash is the best. However, it's performance is very bad.
             * After some testing, i found ELF_hash is better both on performance and dedup rate.
             * So, EFL_hash is default. Now, adler_hash as default.
             */
            if (g_rolling_hash)
            {
                hkey = (block_sz == (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ? adler32_checksum(win_buf, BLOCK_WIN_SIZE) :
                    adler32_rolling_checksum(hkey, BLOCK_WIN_SIZE, adler_pre_char, buf[head+BLOCK_WIN_SIZE-1]);
            }
            else
                hkey = g_cdc_chunk_hashfunc(win_buf);

            /* get a normal chunk */
            if ((hkey % g_block_size) == CHUNK_CDC_R)
            {
                memcpy(block_buf + block_sz, buf + head, BLOCK_WIN_SIZE);
                head += BLOCK_WIN_SIZE;
                block_sz += BLOCK_WIN_SIZE;
                if (block_sz >= BLOCK_MIN_SIZE)
                {
                    md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz,
                        md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_reggile_block_process in file_chunk_cdc");
                        goto _FILE_CHUNK_CDC_EXIT;
                    }
                    */
                    block_sz = 0;
                }
            }
            else
            {
                block_buf[block_sz++] = buf[head++];
                /* get an abnormal chunk */
                if (block_sz >= BLOCK_MAX_SIZE)
                {
                    md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz,
                        md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_reggile_block_process in file_chunk_cdc");
                        goto _FILE_CHUNK_CDC_EXIT;
                    }
                    */
                    block_sz = 0;
                }
            }

            /* avoid unnecessary computation and comparsion */
            if (block_sz == 0)
            {
                block_sz = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : tail - head;
                memcpy(block_buf, buf + head, block_sz);
                head = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    head + (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE) : tail;
            }

            adler_pre_char = buf[head -1];
        }

        /* read expected data from file to full up buf */
        bpos = tail - head;
        exp_rwsize = BUF_MAX_SIZE - bpos;
        adler_pre_char = buf[head -1];
        memmove(buf, buf + head, bpos);
    }
    /* last chunk */

    int last_block_len = ((rwsize + bpos + block_sz) >= 0) ? rwsize + bpos + block_sz : 0;
    if (last_block_len > 0)
    {
        memcpy(last_block_buf, block_buf, block_sz);
        memcpy(last_block_buf + block_sz, buf, rwsize + bpos);

        md5(last_block_buf, last_block_len, md5_checksum);
        f = md5_2_feature(md5_checksum);
        VEC_PUSH_BACK(features, &f);
    }

_FILE_CHUNK_CDC_EXIT:
    return 0;
}

搜索更多相关主题的帖子: features Java 
2016-09-01 14:19



参与讨论请移步原网站贴子:https://bbs.bccn.net/thread-468362-1-1.html




关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 1.665792 second(s), 7 queries.
Copyright©2004-2024, BCCN.NET, All Rights Reserved